From 883ed04f1b5fce77168d85925b89b6afd99365cf Mon Sep 17 00:00:00 2001
From: Raymond Douglass <ray@raydouglass.com>
Date: Thu, 23 Mar 2023 14:57:18 -0400
Subject: [PATCH 01/78] DOC

---
 .github/workflows/build.yaml                  | 16 +++++++-------
 .github/workflows/pr.yaml                     | 22 +++++++++----------
 .github/workflows/test.yaml                   |  8 +++----
 .../all_cuda-118_arch-x86_64.yaml             |  6 ++---
 cpp/CMakeLists.txt                            |  4 ++--
 cpp/doxygen/Doxyfile                          |  2 +-
 dependencies.yaml                             |  6 ++---
 docs/source/build.md                          |  4 ++--
 docs/source/conf.py                           |  4 ++--
 docs/source/developer_guide.md                | 18 +++++++--------
 fetch_rapids.cmake                            |  2 +-
 python/pylibraft/CMakeLists.txt               |  2 +-
 python/pylibraft/pylibraft/__init__.py        |  2 +-
 python/pylibraft/pyproject.toml               |  6 ++---
 python/raft-dask/CMakeLists.txt               |  2 +-
 python/raft-dask/pyproject.toml               |  8 +++----
 python/raft-dask/raft_dask/__init__.py        |  2 +-
 17 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 41b6a639d8..d22af4779e 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -56,7 +56,7 @@ jobs:
     if: github.ref_type == 'branch' && github.event_name == 'push'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: branch
       node_type: "gpu-latest-1"
@@ -65,7 +65,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -101,7 +101,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 1d35611537..bf080d6ad2 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,41 +23,41 @@ jobs:
       - wheel-build-raft-dask
       - wheel-tests-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -77,7 +77,7 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -89,7 +89,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
@@ -100,7 +100,7 @@ jobs:
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index aa7ca21b5f..f1207c3545 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -44,7 +44,7 @@ jobs:
       test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 47af29d9d2..9d447116a3 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -18,7 +18,7 @@ dependencies:
 - cupy
 - cxx-compiler
 - cython>=0.29,<0.30
-- dask-cuda=23.04
+- dask-cuda=23.06
 - dask>=2023.1.1
 - distributed>=2023.1.1
 - doxygen>=1.8.20
@@ -41,7 +41,7 @@ dependencies:
 - pytest
 - pytest-cov
 - recommonmark
-- rmm=23.04
+- rmm=23.06
 - scikit-build>=0.13.1
 - scikit-learn
 - scipy
@@ -49,6 +49,6 @@ dependencies:
 - sphinx-markdown-tables
 - sysroot_linux-64==2.17
 - ucx-proc=*=gpu
-- ucx-py=0.31.*
+- ucx-py=0.32.*
 - ucx>=1.13.0
 name: all_cuda-118_arch-x86_64
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2999045a0c..840321c3fa 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -10,8 +10,8 @@
 # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
-set(RAPIDS_VERSION "23.04")
-set(RAFT_VERSION "23.04.00")
+set(RAPIDS_VERSION "23.06")
+set(RAFT_VERSION "23.06.00")
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 include(../fetch_rapids.cmake)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 2a92c67996..17a1e0caca 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "RAFT C++ API"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "23.04"
+PROJECT_NUMBER         = "23.06"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dependencies.yaml b/dependencies.yaml
index 93893d07af..e920141a79 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -176,12 +176,12 @@ dependencies:
           - dask>=2023.1.1
           - distributed>=2023.1.1
           - ucx>=1.13.0
-          - ucx-py=0.31.*
+          - ucx-py=0.32.*
           - ucx-proc=*=gpu
-          - rmm=23.04
+          - rmm=23.06
           - libfaiss>=1.7.1=cuda*
           - faiss-proc=*=cuda
-          - dask-cuda=23.04
+          - dask-cuda=23.06
   test_python:
     common:
       - output_types: [conda, requirements]
diff --git a/docs/source/build.md b/docs/source/build.md
index 70b07f4e81..29d0a72a37 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -265,7 +265,7 @@ When the needed [build dependencies](#build-dependencies) are already satisfied,
 set(RAFT_GIT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo")
 ExternalProject_Add(raft
   GIT_REPOSITORY    git@github.com:rapidsai/raft.git
-  GIT_TAG           branch-23.04
+  GIT_TAG           branch-23.06
   PREFIX            ${RAFT_GIT_DIR}
   CONFIGURE_COMMAND ""
   BUILD_COMMAND     ""
@@ -297,7 +297,7 @@ The following `cmake` snippet enables a flexible configuration of RAFT:
 
 ```cmake
 
-set(RAFT_VERSION "23.04")
+set(RAFT_VERSION "23.06")
 set(RAFT_FORK "rapidsai")
 set(RAFT_PINNED_TAG "branch-${RAFT_VERSION}")
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 33a8a9217a..f9054420ca 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,9 +67,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '23.04'
+version = '23.06'
 # The full version, including alpha/beta/rc tags.
-release = '23.04.00'
+release = '23.06.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index 56100b38f7..6f57453e28 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -140,13 +140,13 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour
 1. Do not split empty functions/records/namespaces.
 2. Two-space indentation everywhere, including the line continuations.
 3. Disable reflowing of comments.
-   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/.clang-format).
+   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/.clang-format).
 
 #### How is the check done?
-All formatting checks are done by this python script: [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR.
+All formatting checks are done by this python script: [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR.
 
 ##### As part of CI
-[run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section.
+[run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section.
 
 ##### Manually
 Developers can also manually (or setup this command as part of git pre-commit hook) run this check by executing:
@@ -156,10 +156,10 @@ python ./cpp/scripts/run-clang-format.py
 From the root of the RAFT repository.
 
 #### How to know the formatting violations?
-When there are formatting errors, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file.
+When there are formatting errors, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file.
 
 #### How to fix the formatting violations?
-When there are formatting violations, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs.
+When there are formatting violations, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs.
 
 In short, to bulk-fix all the formatting violations, execute the following command:
 ```bash
@@ -168,13 +168,13 @@ python ./cpp/scripts/run-clang-format.py -inplace
 From the root of the RAFT repository.
 
 #### clang-format version?
-To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) script itself. Refer [here](../build#build-dependencies) for the list of build-time dependencies.
+To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) script itself. Refer [here](../build#build-dependencies) for the list of build-time dependencies.
 
 #### Additional scripts
 Along with clang, there are an include checker and copyright checker scripts for checking style, which can be performed as part of CI, as well as manually.
 
 ##### #include style
-[include_checker.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
+[include_checker.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies.
 2. `#include <...>` should be used for referencing everything else
 
@@ -184,7 +184,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list
 ```
 
 ##### Copyright header
-[copyright.py](https://github.com/rapidsai/raft/blob/branch-23.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files
+[copyright.py](https://github.com/rapidsai/raft/blob/branch-23.06/ci/checks/copyright.py) checks the Copyright header for all git-modified files
 
 Manually, you can run the following to bulk-fix the header if only the years need to be updated:
 ```bash
@@ -198,7 +198,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY`
 ## Logging
 
 ### Introduction
-Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
+Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
 
 ### Usage
 ```cpp
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index 2d312bd3e5..c664fd1d9f 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.04/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.06/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake
   )
 endif()
diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt
index b12d0a63ea..77a2a7114e 100644
--- a/python/pylibraft/CMakeLists.txt
+++ b/python/pylibraft/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
-set(pylibraft_version 23.04.00)
+set(pylibraft_version 23.06.00)
 
 include(../../fetch_rapids.cmake)
 
diff --git a/python/pylibraft/pylibraft/__init__.py b/python/pylibraft/pylibraft/__init__.py
index 39145085f0..aebaa4e272 100644
--- a/python/pylibraft/pylibraft/__init__.py
+++ b/python/pylibraft/pylibraft/__init__.py
@@ -13,4 +13,4 @@
 # limitations under the License.
 #
 
-__version__ = "23.04.00"
+__version__ = "23.06.00"
diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml
index 7d92fd0763..785a6df6c8 100644
--- a/python/pylibraft/pyproject.toml
+++ b/python/pylibraft/pyproject.toml
@@ -22,13 +22,13 @@ requires = [
     "scikit-build>=0.13.1",
     "cmake>=3.23.1,!=3.25.0",
     "ninja",
-    "rmm==23.4.*",
+    "rmm==23.6.*",
 ]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "pylibraft"
-version = "23.04.00"
+version = "23.06.00"
 description = "RAFT: Reusable Algorithms Functions and other Tools"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -39,7 +39,7 @@ requires-python = ">=3.8"
 dependencies = [
     "numpy",
     "cuda-python>=11.7.1,<12.0",
-    "rmm==23.4.*",
+    "rmm==23.6.*",
 ]
 classifiers = [
     "Intended Audience :: Developers",
diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt
index 8486523226..816c68e83c 100644
--- a/python/raft-dask/CMakeLists.txt
+++ b/python/raft-dask/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
-set(raft_dask_version 23.04.00)
+set(raft_dask_version 23.06.00)
 
 include(../../fetch_rapids.cmake)
 
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 2fe6522f57..88ac8d80ac 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -25,7 +25,7 @@ requires = [
 
 [project]
 name = "raft-dask"
-version = "23.04.00"
+version = "23.06.00"
 description = "Reusable Accelerated Functions & Tools Dask Infrastructure"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -37,11 +37,11 @@ dependencies = [
     "numpy",
     "numba>=0.49",
     "joblib>=0.11",
-    "dask-cuda==23.4.*",
+    "dask-cuda==23.6.*",
     "dask>=2023.1.1",
-    "ucx-py==0.31.*",
+    "ucx-py==0.32.*",
     "distributed>=2023.1.1",
-    "pylibraft==23.4.*",
+    "pylibraft==23.6.*",
 ]
 classifiers = [
     "Intended Audience :: Developers",
diff --git a/python/raft-dask/raft_dask/__init__.py b/python/raft-dask/raft_dask/__init__.py
index 4f4700df48..9582da4851 100644
--- a/python/raft-dask/raft_dask/__init__.py
+++ b/python/raft-dask/raft_dask/__init__.py
@@ -13,4 +13,4 @@
 # limitations under the License.
 #
 
-__version__ = "23.04.00"
+__version__ = "23.06.00"

From a88072cbe4784a25d6efef05d4de4528dd4fa5ae Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Wed, 29 Mar 2023 21:17:48 -0400
Subject: [PATCH 02/78] Update rapids version

---
 cpp/template/cmake/thirdparty/fetch_rapids.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake
index 40ba83be9e..248f4f1af4 100644
--- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake
+++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 
 # Use this variable to update RAPIDS and RAFT versions
-set(RAPIDS_VERSION "23.04")
+set(RAPIDS_VERSION "23.06")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake

From 5f0e66d1b18a8ce992db61add7e726ec4d5c2848 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Wed, 29 Mar 2023 21:38:40 -0400
Subject: [PATCH 03/78] Update pylibraft version

---
 dependencies.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 6a07cd890d..0460e2dd81 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -278,7 +278,7 @@ dependencies:
           - ucx-proc=*=gpu
       - output_types: pyproject
         packages:
-          - pylibraft==23.4.*
+          - pylibraft==23.6.*
   test_python_common:
     common:
       - output_types: [conda, requirements, pyproject]

From 3c5b8de4c791e2ab44cadbaa68acd29b8ef9dcbb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Wed, 29 Mar 2023 21:48:31 -0400
Subject: [PATCH 04/78] Run dfg

---
 python/raft-dask/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 0ca9e7a876..1fb5aa8f7c 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
     "joblib>=0.11",
     "numba>=0.49",
     "numpy>=1.21",
-    "pylibraft==23.4.*",
+    "pylibraft==23.6.*",
     "ucx-py==0.32.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

From 9bac6d53d32567d218b8188c88f4cdf665625fe4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 30 Mar 2023 09:40:42 -0400
Subject: [PATCH 05/78] Fix dask versions in wheel build preinstallation

---
 .github/workflows/pr.yaml   | 4 ++--
 .github/workflows/test.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index bf080d6ad2..cf8f8cd4b5 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -105,7 +105,7 @@ jobs:
       build_type: pull-request
       package-name: raft_dask
       # Always want to test against latest dask/distributed.
-      test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
-      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
+      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
       test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f1207c3545..ebf596c958 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -51,6 +51,6 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: raft_dask
-      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
-      test-before-arm64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
+      test-before-arm64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"

From 698d1dfa6e9b5f6069f88f9aad121aa394f1cc64 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 31 Mar 2023 19:12:15 -0400
Subject: [PATCH 06/78] Fix ucx-py pin in raft-dask recipe (#1396)

Update the ucx-py pinning for raft-dask 23.06

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Mark Sadang (https://github.com/msadang)

URL: https://github.com/rapidsai/raft/pull/1396
---
 conda/recipes/raft-dask/conda_build_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml
index 778b187870..4f88728f4b 100644
--- a/conda/recipes/raft-dask/conda_build_config.yaml
+++ b/conda/recipes/raft-dask/conda_build_config.yaml
@@ -14,7 +14,7 @@ ucx_version:
   - ">=1.13.0,<1.15.0"
 
 ucx_py_version:
-  - "0.31.*"
+  - "0.32.*"
 
 cmake_version:
   - ">=3.23.1,!=3.25.0"

From 9048dff15bfaa0bb9e30bb8458aa0f3380660af6 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 7 Apr 2023 08:22:33 -0400
Subject: [PATCH 07/78] Have consistent compile lines between BUILD_TESTS
 enabled or not (#1401)

This will remove 1h from our conda CI builds since we can now re-use the cached object files between `libraft` and `libraft-tests`

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ben Frederickson (https://github.com/benfred)
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1401
---
 cpp/CMakeLists.txt | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f4e03249d4..144f58c4d6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -70,15 +70,12 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
-if(BUILD_TESTS
-   OR BUILD_PRIMS_BENCH
-   OR BUILD_ANN_BENCH
-)
-  # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs
-  # to have different values for the `Threads::Threads` target. Setting this flag ensures
-  # `Threads::Threads` is the same value in first run and subsequent runs.
-  set(THREADS_PREFER_PTHREAD_FLAG ON)
-endif()
+
+# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs
+# to have different values for the `Threads::Threads` target. Setting this flag ensures
+# `Threads::Threads` is the same value across all builds so that cache hits occur
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
 
 include(CMakeDependentOption)
 # cmake_dependent_option( RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for

From a98295b516ef58bc855177077860bab2a2a76d77 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 10 Apr 2023 11:08:35 -0700
Subject: [PATCH 08/78] Remove uses-setup-env-vars (#1406)

This setting now matches the default behavior of the shared-action-workflows repo

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1406
---
 .github/workflows/build.yaml | 2 --
 .github/workflows/pr.yaml    | 2 --
 2 files changed, 4 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 3c8cc4912d..bec89ab888 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -75,7 +75,6 @@ jobs:
       package-name: pylibraft
       package-dir: python/pylibraft
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
-      uses-setup-env-vars: false
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
@@ -98,7 +97,6 @@ jobs:
       package-name: raft_dask
       package-dir: python/raft-dask
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
-      uses-setup-env-vars: false
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 23834ab21c..8175b4fbc7 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -73,7 +73,6 @@ jobs:
       package-name: pylibraft
       package-dir: python/pylibraft
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
-      uses-setup-env-vars: false
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
@@ -96,7 +95,6 @@ jobs:
       package-dir: python/raft-dask
       before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-wheelhouse"
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
-      uses-setup-env-vars: false
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit

From 35c2f1c95a7da45fa5ef703ab66ef2f89e613e4d Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Thu, 13 Apr 2023 19:15:18 -0400
Subject: [PATCH 09/78] Generate build metrics report for test and benchmarks
 (#1414)

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1414
---
 build.sh                                      | 37 ++++++++++++++-----
 conda/recipes/libraft/build_libraft.sh        |  2 +-
 .../recipes/libraft/build_libraft_nn_bench.sh |  2 +-
 conda/recipes/libraft/build_libraft_tests.sh  |  2 +-
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/build.sh b/build.sh
index 270c75de93..039f0ed6a5 100755
--- a/build.sh
+++ b/build.sh
@@ -18,8 +18,8 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall  -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn  --build-metrics --incl-cache-stats --time -h"
-HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--limit-bench-ann=<targets>]
+VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall  -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
    libraft          - build the raft C++ code only. Also builds the C-wrapper library
@@ -45,7 +45,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    --allgpuarch                - build for all supported GPU architectures
    --no-nvtx                   - disable nvtx (profiling markers), but allow enabling it in downstream projects
    --show_depr_warn            - show cmake deprecation warnings
-   --build-metrics             - generate build metrics report for libraft
+   --build-metrics             - filename for generating build metrics report for libraft
    --incl-cache-stats          - include cache statistics in build metrics report
    --cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
    --cache-tool=<tool>         - pass the build cache tool (eg: ccache, sccache, distcc) that will be used
@@ -73,7 +73,7 @@ BUILD_PRIMS_BENCH=OFF
 BUILD_ANN_BENCH=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
-BUILD_REPORT_METRICS=OFF
+BUILD_REPORT_METRICS=""
 BUILD_REPORT_INCL_CACHE_STATS=OFF
 
 TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
@@ -189,6 +189,25 @@ function limitAnnBench {
     fi
 }
 
+function buildMetrics {
+    # Check for multiple build-metrics options
+    if [[ $(echo $ARGS | { grep -Eo "\-\-build\-metrics" || true; } | wc -l ) -gt 1 ]]; then
+        echo "Multiple --build-metrics options were provided, please provide only one: ${ARGS}"
+        exit 1
+    fi
+    # Check for build-metrics option
+    if [[ -n $(echo $ARGS | { grep -E "\-\-build\-metrics" || true; } ) ]]; then
+        # There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
+        # the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
+        # on the invalid option error
+        BUILD_REPORT_METRICS=$(echo $ARGS | sed -e 's/.*--build-metrics=//' -e 's/ .*//')
+        if [[ -n ${BUILD_REPORT_METRICS} ]]; then
+            # Remove the full BUILD_REPORT_METRICS argument from list of args so that it passes validArgs function
+            ARGS=${ARGS//--build-metrics=$BUILD_REPORT_METRICS/}
+        fi
+    fi
+}
+
 if hasArg -h || hasArg --help; then
     echo "${HELP}"
     exit 0
@@ -201,6 +220,7 @@ if (( ${NUMARGS} != 0 )); then
     limitTests
     limitBench
     limitAnnBench
+    buildMetrics
     for a in ${ARGS}; do
         if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
             echo "Invalid option: ${a}"
@@ -339,9 +359,6 @@ fi
 if hasArg clean; then
     CLEAN=1
 fi
-if hasArg --build-metrics; then
-    BUILD_REPORT_METRICS=ON
-fi
 if hasArg --incl-cache-stats; then
     BUILD_REPORT_INCL_CACHE_STATS=ON
 fi
@@ -422,7 +439,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
   compile_end=$(date +%s)
   compile_total=$(( compile_end - compile_start ))
 
-  if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIBRAFT_BUILD_DIR}/.ninja_log" ]]; then
+  if [[ -n "$BUILD_REPORT_METRICS" && -f "${LIBRAFT_BUILD_DIR}/.ninja_log" ]]; then
       if ! rapids-build-metrics-reporter.py 2> /dev/null && [ ! -f rapids-build-metrics-reporter.py ]; then
           echo "Downloading rapids-build-metrics-reporter.py"
           curl -sO https://raw.githubusercontent.com/rapidsai/build-metrics-reporter/v1/rapids-build-metrics-reporter.py
@@ -454,13 +471,13 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           MSG="${MSG}<br/>libraft.so size: $LIBRAFT_FS"
       fi
       BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIBRAFT_BUILD_DIR}"}
-      echo "The HTML report can be found at [${BMR_DIR}/ninja_log.html]. In CI, this report"
+      echo "The HTML report can be found at [${BMR_DIR}/${BUILD_REPORT_METRICS}.html]. In CI, this report"
       echo "will also be uploaded to the appropriate subdirectory of https://downloads.rapids.ai/ci/raft/, and"
       echo "the entire URL can be found in \"conda-cpp-build\" runs under the task \"Upload additional artifacts\""
       mkdir -p ${BMR_DIR}
       MSG_OUTFILE="$(mktemp)"
       echo "$MSG" > "${MSG_OUTFILE}"
-      PATH=".:$PATH" python rapids-build-metrics-reporter.py ${LIBRAFT_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
+      PATH=".:$PATH" python rapids-build-metrics-reporter.py ${LIBRAFT_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/${BUILD_REPORT_METRICS}.html
       cp ${LIBRAFT_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
   fi
 fi
diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh
index 2bf9b428cb..7d4173e8bb 100644
--- a/conda/recipes/libraft/build_libraft.sh
+++ b/conda/recipes/libraft/build_libraft.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh libraft --allgpuarch --compile-lib --build-metrics --incl-cache-stats --no-nvtx
+./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/libraft/build_libraft_nn_bench.sh
index dc6250f0f4..00078792a1 100644
--- a/conda/recipes/libraft/build_libraft_nn_bench.sh
+++ b/conda/recipes/libraft/build_libraft_nn_bench.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-./build.sh tests bench-ann --allgpuarch --no-nvtx
+./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
 cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh
index cc28f93fb8..05a2b59eb0 100644
--- a/conda/recipes/libraft/build_libraft_tests.sh
+++ b/conda/recipes/libraft/build_libraft_tests.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh tests bench-prims --allgpuarch --no-nvtx
+./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats
 cmake --install cpp/build --component testing

From 7c5b63845b929da5f35fd7711d5547f726ed7dbe Mon Sep 17 00:00:00 2001
From: Micka <mide@nvidia.com>
Date: Fri, 14 Apr 2023 01:16:56 +0200
Subject: [PATCH 10/78] Fix IVF-PQ API to use `device_vector_view` (#1384)

This PR mainly intends to replace `device_matrix_view` for `ivf_pq::extend` to `device_vector_view`.
There are also a few updates to the documentation to reflect the current API.
The order of the arguments in the API is not touched.

Authors:
  - Micka (https://github.com/lowener)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Victor Lafargue (https://github.com/viclafargue)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1384
---
 cpp/include/raft/neighbors/ivf_flat.cuh       | 23 ++++----
 cpp/include/raft/neighbors/ivf_pq.cuh         |  8 +--
 .../raft/neighbors/specializations/ivf_pq.cuh | 34 +++++------
 cpp/include/raft_runtime/neighbors/ivf_pq.hpp | 40 ++++++-------
 cpp/src/neighbors/ivfpq_build.cu              | 58 +++++++++----------
 .../ivfpq_extend_float_int64_t.cu             | 22 +++----
 .../ivfpq_extend_int8_t_int64_t.cu            | 22 +++----
 .../ivfpq_extend_uint8_t_int64_t.cu           | 22 +++----
 cpp/test/neighbors/ann_ivf_pq.cuh             |  5 +-
 .../pylibraft/neighbors/ivf_flat/ivf_flat.pyx |  2 +-
 .../neighbors/ivf_pq/cpp/c_ivf_pq.pxd         | 12 ++--
 .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx     | 22 +++++--
 12 files changed, 140 insertions(+), 130 deletions(-)

diff --git a/cpp/include/raft/neighbors/ivf_flat.cuh b/cpp/include/raft/neighbors/ivf_flat.cuh
index c573676504..f12062f851 100644
--- a/cpp/include/raft/neighbors/ivf_flat.cuh
+++ b/cpp/include/raft/neighbors/ivf_flat.cuh
@@ -94,12 +94,11 @@ auto build(raft::device_resources const& handle,
  *   // use default search parameters
  *   ivf_flat::search_params search_params;
  *   // search K nearest neighbours for each of the N queries
- *   ivf_flat::search(handle, index, queries, out_inds, out_dists, search_params, k);
+ *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
  * @endcode
  *
  * @tparam value_t data element type
  * @tparam idx_t type of the indices in the source dataset
- * @tparam int_t precision / type of integral arguments
  *
  * @param[in] handle
  * @param[in] params configure the index building
@@ -139,13 +138,11 @@ auto build(raft::device_resources const& handle,
  *   // use default search parameters
  *   ivf_flat::search_params search_params;
  *   // search K nearest neighbours for each of the N queries
- *   ivf_flat::search(handle, index, queries, out_inds, out_dists, search_params, k);
+ *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
  * @endcode
  *
  * @tparam value_t data element type
  * @tparam idx_t type of the indices in the source dataset
- * @tparam int_t precision / type of integral arguments
- * @tparam matrix_idx_t matrix indexing type
  *
  * @param[in] handle
  * @param[in] params configure the index building
@@ -232,7 +229,8 @@ auto extend(raft::device_resources const& handle,
  *   // train the index from a [N, D] dataset
  *   auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset);
  *   // fill the index with the data
- *   auto index = ivf_flat::extend(handle, index_empty, dataset);
+ *   std::optional<raft::device_vector_view<const idx_t, idx_t>> no_op = std::nullopt;
+ *   auto index = ivf_flat::extend(handle, index_empty, no_op, dataset);
  * @endcode
  *
  * @tparam value_t data element type
@@ -240,7 +238,7 @@ auto extend(raft::device_resources const& handle,
  *
  * @param[in] handle
  * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices optional raft::device_matrix_view to a vector of indices [n_rows].
+ * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
  *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
  *    here to imply a continuous range `[0...n_rows)`.
  * @param[in] orig_index original index
@@ -314,7 +312,7 @@ void extend(raft::device_resources const& handle,
  *   index_params.add_data_on_build = false;      // don't populate index on build
  *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
  *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset);
+ *   auto index_empty = ivf_flat::build(handle, index_params, dataset);
  *   // fill the index with the data
  *   std::optional<raft::device_vector_view<const idx_t, idx_t>> no_op = std::nullopt;
  *   ivf_flat::extend(handle, dataset, no_opt, &index_empty);
@@ -325,7 +323,7 @@ void extend(raft::device_resources const& handle,
  *
  * @param[in] handle
  * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices optional raft::device_matrix_view to a vector of indices [n_rows].
+ * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
  *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
  *    here to imply a continuous range `[0...n_rows)`.
  * @param[inout] index pointer to index, to be overwritten in-place
@@ -422,15 +420,14 @@ void search(raft::device_resources const& handle,
  *   ivf_flat::search_params search_params;
  *   // Use the same allocator across multiple searches to reduce the number of
  *   // cuda memory allocations
- *   ivf_flat::search(handle, index, queries1, out_inds1, out_dists1, search_params, K);
- *   ivf_flat::search(handle, index, queries2, out_inds2, out_dists2, search_params, K);
- *   ivf_flat::search(handle, index, queries3, out_inds3, out_dists3, search_params, K);
+ *   ivf_flat::search(handle, search_params, index, queries1, out_inds1, out_dists1);
+ *   ivf_flat::search(handle, search_params, index, queries2, out_inds2, out_dists2);
+ *   ivf_flat::search(handle, search_params, index, queries3, out_inds3, out_dists3);
  *   ...
  * @endcode
  *
  * @tparam value_t data element type
  * @tparam idx_t type of the indices
- * @tparam int_t precision / type of integral arguments
  *
  * @param[in] handle
  * @param[in] params configure the search
diff --git a/cpp/include/raft/neighbors/ivf_pq.cuh b/cpp/include/raft/neighbors/ivf_pq.cuh
index 4a12ca72a4..934643e0af 100644
--- a/cpp/include/raft/neighbors/ivf_pq.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq.cuh
@@ -69,7 +69,7 @@ index<IdxT> build(raft::device_resources const& handle,
  *
  * @param[in] handle
  * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device matrix view to a vector of indices [n_rows].
+ * @param[in] new_indices a device vector view to a vector of indices [n_rows].
  *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
  *    here to imply a continuous range `[0...n_rows)`.
  * @param[inout] idx
@@ -77,7 +77,7 @@ index<IdxT> build(raft::device_resources const& handle,
 template <typename T, typename IdxT>
 index<IdxT> extend(raft::device_resources const& handle,
                    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
-                   std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices,
+                   std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
                    const index<IdxT>& idx)
 {
   ASSERT(new_vectors.extent(1) == idx.dim(),
@@ -104,7 +104,7 @@ index<IdxT> extend(raft::device_resources const& handle,
  *
  * @param[in] handle
  * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device matrix view to a vector of indices [n_rows].
+ * @param[in] new_indices a device vector view to a vector of indices [n_rows].
  *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
  *    here to imply a continuous range `[0...n_rows)`.
  * @param[inout] idx
@@ -112,7 +112,7 @@ index<IdxT> extend(raft::device_resources const& handle,
 template <typename T, typename IdxT>
 void extend(raft::device_resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
-            std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             index<IdxT>* idx)
 {
   ASSERT(new_vectors.extent(1) == idx->dim(),
diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
index 55a7cd5858..9209f5095d 100644
--- a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
+++ b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
@@ -35,24 +35,22 @@ namespace raft::neighbors::ivf_pq {
 // We define overloads for build and extend with void return type. This is used in the Cython
 // wrappers, where exception handling is not compatible with return type that has nontrivial
 // constructor.
-#define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                          \
-  extern template auto build(raft::device_resources const&,                      \
-                             const raft::neighbors::ivf_pq::index_params&,       \
-                             raft::device_matrix_view<const T, IdxT, row_major>) \
-    ->raft::neighbors::ivf_pq::index<IdxT>;                                      \
-                                                                                 \
-  extern template auto extend(                                                   \
-    raft::device_resources const&,                                               \
-    raft::device_matrix_view<const T, IdxT, row_major>,                          \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>>,        \
-    const raft::neighbors::ivf_pq::index<IdxT>&)                                 \
-    ->raft::neighbors::ivf_pq::index<IdxT>;                                      \
-                                                                                 \
-  extern template void extend(                                                   \
-    raft::device_resources const&,                                               \
-    raft::device_matrix_view<const T, IdxT, row_major>,                          \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>>,        \
-    raft::neighbors::ivf_pq::index<IdxT>*);
+#define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                                  \
+  extern template auto build(raft::device_resources const&,                              \
+                             const raft::neighbors::ivf_pq::index_params&,               \
+                             raft::device_matrix_view<const T, IdxT, row_major>)         \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                              \
+                                                                                         \
+  extern template auto extend(raft::device_resources const&,                             \
+                              raft::device_matrix_view<const T, IdxT, row_major>,        \
+                              std::optional<raft::device_vector_view<const IdxT, IdxT>>, \
+                              const raft::neighbors::ivf_pq::index<IdxT>&)               \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                              \
+                                                                                         \
+  extern template void extend(raft::device_resources const&,                             \
+                              raft::device_matrix_view<const T, IdxT, row_major>,        \
+                              std::optional<raft::device_vector_view<const IdxT, IdxT>>, \
+                              raft::neighbors::ivf_pq::index<IdxT>*);
 
 RAFT_DECL_BUILD_EXTEND(float, int64_t)
 RAFT_DECL_BUILD_EXTEND(int8_t, int64_t)
diff --git a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
index fb22d7657e..17260b0ded 100644
--- a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
+++ b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
@@ -23,26 +23,26 @@ namespace raft::runtime::neighbors::ivf_pq {
 // We define overloads for build and extend with void return type. This is used in the Cython
 // wrappers, where exception handling is not compatible with return type that has nontrivial
 // constructor.
-#define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                                         \
-  [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> build(                                     \
-    raft::device_resources const& handle,                                                       \
-    const raft::neighbors::ivf_pq::index_params& params,                                        \
-    raft::device_matrix_view<const T, IdxT, row_major> dataset);                                \
-                                                                                                \
-  void build(raft::device_resources const& handle,                                              \
-             const raft::neighbors::ivf_pq::index_params& params,                               \
-             raft::device_matrix_view<const T, IdxT, row_major> dataset,                        \
-             raft::neighbors::ivf_pq::index<IdxT>* idx);                                        \
-                                                                                                \
-  [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> extend(                                    \
-    raft::device_resources const& handle,                                                       \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                             \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices,           \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                           \
-                                                                                                \
-  void extend(raft::device_resources const& handle,                                             \
-              raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-              std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
+#define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                              \
+  [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> build(                          \
+    raft::device_resources const& handle,                                            \
+    const raft::neighbors::ivf_pq::index_params& params,                             \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset);                     \
+                                                                                     \
+  void build(raft::device_resources const& handle,                                   \
+             const raft::neighbors::ivf_pq::index_params& params,                    \
+             raft::device_matrix_view<const T, IdxT, row_major> dataset,             \
+             raft::neighbors::ivf_pq::index<IdxT>* idx);                             \
+                                                                                     \
+  [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> extend(                         \
+    raft::device_resources const& handle,                                            \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                  \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,           \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                \
+                                                                                     \
+  void extend(raft::device_resources const& handle,                                  \
+              raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+              std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
               raft::neighbors::ivf_pq::index<IdxT>* idx);
 
 RAFT_DECL_BUILD_EXTEND(float, int64_t);
diff --git a/cpp/src/neighbors/ivfpq_build.cu b/cpp/src/neighbors/ivfpq_build.cu
index 8759ca2587..7f91e34969 100644
--- a/cpp/src/neighbors/ivfpq_build.cu
+++ b/cpp/src/neighbors/ivfpq_build.cu
@@ -20,35 +20,35 @@
 
 namespace raft::runtime::neighbors::ivf_pq {
 
-#define RAFT_INST_BUILD_EXTEND(T, IdxT)                                                         \
-  raft::neighbors::ivf_pq::index<IdxT> build(                                                   \
-    raft::device_resources const& handle,                                                       \
-    const raft::neighbors::ivf_pq::index_params& params,                                        \
-    raft::device_matrix_view<const T, IdxT, row_major> dataset)                                 \
-  {                                                                                             \
-    return raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                    \
-  }                                                                                             \
-  void build(raft::device_resources const& handle,                                              \
-             const raft::neighbors::ivf_pq::index_params& params,                               \
-             raft::device_matrix_view<const T, IdxT, row_major> dataset,                        \
-             raft::neighbors::ivf_pq::index<IdxT>* idx)                                         \
-  {                                                                                             \
-    *idx = raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                    \
-  }                                                                                             \
-  raft::neighbors::ivf_pq::index<IdxT> extend(                                                  \
-    raft::device_resources const& handle,                                                       \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                             \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices,           \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx)                                            \
-  {                                                                                             \
-    return raft::neighbors::ivf_pq::extend<T, IdxT>(handle, new_vectors, new_indices, idx);     \
-  }                                                                                             \
-  void extend(raft::device_resources const& handle,                                             \
-              raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-              std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
-              raft::neighbors::ivf_pq::index<IdxT>* idx)                                        \
-  {                                                                                             \
-    raft::neighbors::ivf_pq::extend<T, IdxT>(handle, new_vectors, new_indices, idx);            \
+#define RAFT_INST_BUILD_EXTEND(T, IdxT)                                                     \
+  raft::neighbors::ivf_pq::index<IdxT> build(                                               \
+    raft::device_resources const& handle,                                                   \
+    const raft::neighbors::ivf_pq::index_params& params,                                    \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset)                             \
+  {                                                                                         \
+    return raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                \
+  }                                                                                         \
+  void build(raft::device_resources const& handle,                                          \
+             const raft::neighbors::ivf_pq::index_params& params,                           \
+             raft::device_matrix_view<const T, IdxT, row_major> dataset,                    \
+             raft::neighbors::ivf_pq::index<IdxT>* idx)                                     \
+  {                                                                                         \
+    *idx = raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                \
+  }                                                                                         \
+  raft::neighbors::ivf_pq::index<IdxT> extend(                                              \
+    raft::device_resources const& handle,                                                   \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                         \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,                  \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx)                                        \
+  {                                                                                         \
+    return raft::neighbors::ivf_pq::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
+  }                                                                                         \
+  void extend(raft::device_resources const& handle,                                         \
+              raft::device_matrix_view<const T, IdxT, row_major> new_vectors,               \
+              std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,        \
+              raft::neighbors::ivf_pq::index<IdxT>* idx)                                    \
+  {                                                                                         \
+    raft::neighbors::ivf_pq::extend<T, IdxT>(handle, new_vectors, new_indices, idx);        \
   }
 
 RAFT_INST_BUILD_EXTEND(float, int64_t);
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
index 4cc616f32d..584bbfc45c 100644
--- a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
+++ b/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
@@ -19,17 +19,17 @@
 
 namespace raft::neighbors::ivf_pq {
 
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                   \
-  template auto extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
-    const index<IdxT>& idx)                                                           \
-    ->index<IdxT>;                                                                    \
-  template void extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
+#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
+  template auto extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const index<IdxT>& idx)                                                \
+    ->index<IdxT>;                                                         \
+  template void extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     index<IdxT>* idx);
 
 RAFT_MAKE_INSTANCE(float, int64_t);
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
index a3117aae0f..00311a77e4 100644
--- a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
@@ -19,17 +19,17 @@
 
 namespace raft::neighbors::ivf_pq {
 
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                   \
-  template auto extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
-    const index<IdxT>& idx)                                                           \
-    ->index<IdxT>;                                                                    \
-  template void extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
+#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
+  template auto extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const index<IdxT>& idx)                                                \
+    ->index<IdxT>;                                                         \
+  template void extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     index<IdxT>* idx);
 
 RAFT_MAKE_INSTANCE(int8_t, int64_t);
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
index a5e3d68569..11524886f0 100644
--- a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
@@ -19,17 +19,17 @@
 
 namespace raft::neighbors::ivf_pq {
 
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                   \
-  template auto extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
-    const index<IdxT>& idx)                                                           \
-    ->index<IdxT>;                                                                    \
-  template void extend<T, IdxT>(                                                      \
-    raft::device_resources const& handle,                                             \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                   \
-    std::optional<raft::device_matrix_view<const IdxT, IdxT, row_major>> new_indices, \
+#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
+  template auto extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const index<IdxT>& idx)                                                \
+    ->index<IdxT>;                                                         \
+  template void extend<T, IdxT>(                                           \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     index<IdxT>* idx);
 
 RAFT_MAKE_INSTANCE(uint8_t, int64_t);
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index c331081314..c69829821a 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -210,13 +210,12 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     auto idx = ivf_pq::build<DataT, IdxT>(handle_, ipams, database_view);
 
     auto vecs_2_view = raft::make_device_matrix_view<DataT, IdxT>(vecs_2, size_2, ps.dim);
-    auto inds_2_view = raft::make_device_matrix_view<IdxT, IdxT>(inds_2, size_2, 1);
+    auto inds_2_view = raft::make_device_vector_view<IdxT, IdxT>(inds_2, size_2);
     ivf_pq::extend<DataT, IdxT>(handle_, vecs_2_view, inds_2_view, &idx);
 
     auto vecs_1_view =
       raft::make_device_matrix_view<DataT, IdxT, row_major>(vecs_1, size_1, ps.dim);
-    auto inds_1_view =
-      raft::make_device_matrix_view<const IdxT, IdxT, row_major>(inds_1, size_1, 1);
+    auto inds_1_view = raft::make_device_vector_view<const IdxT, IdxT>(inds_1, size_1);
     ivf_pq::extend<DataT, IdxT>(handle_, vecs_1_view, inds_1_view, &idx);
     return idx;
   }
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
index db279ad2db..352376fe17 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
+++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
@@ -427,7 +427,7 @@ def extend(Index index, new_vectors, new_indices, handle=None):
         Trained ivf_flat object.
     new_vectors : CUDA array interface compliant matrix shape (n_samples, dim)
         Supported dtype [float, int8, uint8]
-    new_indices : CUDA array interface compliant matrix shape (n_samples, dim)
+    new_indices : CUDA array interface compliant vector shape (n_samples)
         Supported dtype [int64]
     {handle_docstring}
 
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd
index d04d833f3b..531c2428e9 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd
@@ -29,7 +29,11 @@ from libcpp.string cimport string
 
 from rmm._lib.memory_resource cimport device_memory_resource
 
-from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major
+from pylibraft.common.cpp.mdspan cimport (
+    device_matrix_view,
+    device_vector_view,
+    row_major,
+)
 from pylibraft.common.handle cimport device_resources
 from pylibraft.common.optional cimport optional
 from pylibraft.distance.distance_type cimport DistanceType
@@ -126,19 +130,19 @@ cdef extern from "raft_runtime/neighbors/ivf_pq.hpp" \
     cdef void extend(
         const device_resources& handle,
         device_matrix_view[float, int64_t, row_major] new_vectors,
-        optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices,
+        optional[device_vector_view[int64_t, int64_t]] new_indices,
         index[int64_t]* index) except +
 
     cdef void extend(
         const device_resources& handle,
         device_matrix_view[int8_t, int64_t, row_major] new_vectors,
-        optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices,
+        optional[device_vector_view[int64_t, int64_t]] new_indices,
         index[int64_t]* index) except +
 
     cdef void extend(
         const device_resources& handle,
         device_matrix_view[uint8_t, int64_t, row_major] new_vectors,
-        optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices,
+        optional[device_vector_view[int64_t, int64_t]] new_indices,
         index[int64_t]* index) except +
 
     cdef void search(
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
index 1906c569f6..b89e5dd44d 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
+++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
@@ -51,10 +51,16 @@ from rmm._lib.memory_resource cimport (
 
 cimport pylibraft.neighbors.ivf_flat.cpp.c_ivf_flat as c_ivf_flat
 cimport pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq as c_ivf_pq
+from pylibraft.common.optional cimport make_optional, optional
 
 from pylibraft.neighbors.common import _check_input_array, _get_metric
 
-from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major
+from pylibraft.common.cpp.mdspan cimport (
+    device_matrix_view,
+    device_vector_view,
+    make_device_vector_view,
+    row_major,
+)
 from pylibraft.common.mdspan cimport (
     get_dmv_float,
     get_dmv_int8,
@@ -416,7 +422,7 @@ def extend(Index index, new_vectors, new_indices, handle=None):
         Trained ivf_pq object.
     new_vectors : array interface compliant matrix shape (n_samples, dim)
         Supported dtype [float, int8, uint8]
-    new_indices : array interface compliant matrix shape (n_samples, dim)
+    new_indices : array interface compliant vector shape (n_samples)
         Supported dtype [int64]
     {handle_docstring}
 
@@ -472,6 +478,7 @@ def extend(Index index, new_vectors, new_indices, handle=None):
 
     vecs_cai = wrap_array(new_vectors)
     vecs_dt = vecs_cai.dtype
+    cdef optional[device_vector_view[int64_t, int64_t]] new_indices_opt
     cdef int64_t n_rows = vecs_cai.shape[0]
     cdef uint32_t dim = vecs_cai.shape[1]
 
@@ -484,23 +491,28 @@ def extend(Index index, new_vectors, new_indices, handle=None):
     if len(idx_cai.shape)!=1:
         raise ValueError("Indices array is expected to be 1D")
 
+    if index.index.size() > 0:
+        new_indices_opt = make_device_vector_view(
+            <int64_t *><uintptr_t>idx_cai.data,
+            <int64_t>idx_cai.shape[0])
+
     if vecs_dt == np.float32:
         with cuda_interruptible():
             c_ivf_pq.extend(deref(handle_),
                             get_dmv_float(vecs_cai, check_shape=True),
-                            make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)),  # noqa: E501
+                            new_indices_opt,
                             index.index)
     elif vecs_dt == np.int8:
         with cuda_interruptible():
             c_ivf_pq.extend(deref(handle_),
                             get_dmv_int8(vecs_cai, check_shape=True),
-                            make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)),  # noqa: E501
+                            new_indices_opt,
                             index.index)
     elif vecs_dt == np.uint8:
         with cuda_interruptible():
             c_ivf_pq.extend(deref(handle_),
                             get_dmv_uint8(vecs_cai, check_shape=True),
-                            make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)),  # noqa: E501
+                            new_indices_opt,
                             index.index)
     else:
         raise TypeError("query dtype %s not supported" % vecs_dt)

From c950854af7cb21e63cf6d161d4d04970a7ebef3b Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 13 Apr 2023 19:44:31 -0400
Subject: [PATCH 11/78] Adding base header-only conda package without cuda math
 libs (#1386)

cc @MatthiasKohl  @bdice

Making sure CI agrees w/ this change. @MatthiasKohl, if CI succeeds here let's try to plug the resulting conda packages into a cugraph-ops PR to make sure cugraph-ops CI is happy as well.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1386
---
 build.sh                                      |  2 +-
 conda/recipes/libraft/meta.yaml               | 51 ++++++---------
 cpp/CMakeLists.txt                            | 63 ++++++++++++++-----
 cpp/bench/ann/CMakeLists.txt                  |  1 +
 .../raft/linalg/detail/cublas_wrappers.hpp    |  8 ++-
 .../detail/modularity_maximization.hpp        | 18 ------
 cpp/test/CMakeLists.txt                       |  3 +-
 docs/source/build.md                          | 24 ++++---
 8 files changed, 89 insertions(+), 81 deletions(-)

diff --git a/build.sh b/build.sh
index 039f0ed6a5..ab904abdad 100755
--- a/build.sh
+++ b/build.sh
@@ -522,7 +522,7 @@ fi
 # Initiate build for example RAFT application template (if needed)
 
 if hasArg template; then
-    pushd cpp/template
+    pushd ${REPODIR}/cpp/template
     ./build.sh
     popd
 fi
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index ccd7314484..8ec9cc10c6 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -16,7 +16,7 @@ source:
   git_url: ../../..
 
 outputs:
-  - name: libraft-headers
+  - name: libraft-headers-only
     version: {{ version }}
     script: build_libraft_headers.sh
     build:
@@ -50,20 +50,26 @@ outputs:
         - ninja
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
-        - cuda-profiler-api {{ cuda_profiler_api_host_version }}
-        - cudatoolkit ={{ cuda_version }}
-        - libcublas {{ libcublas_host_version }}
-        - libcublas-dev {{ libcublas_host_version }}
-        - libcurand {{ libcurand_host_version }}
-        - libcurand-dev {{ libcurand_host_version }}
-        - libcusolver {{ libcusolver_host_version }}
-        - libcusolver-dev {{ libcusolver_host_version }}
-        - libcusparse {{ libcusparse_host_version }}
-        - libcusparse-dev {{ libcusparse_host_version }}
         - librmm ={{ minor_version }}
+        - cudatoolkit {{ cuda_version }}
+    about:
+      home: https://rapids.ai/
+      license: Apache-2.0
+      summary: libraft-headers-only library
+  - name: libraft-headers
+    version: {{ version }}
+    build:
+      number: {{ GIT_DESCRIBE_NUMBER }}
+      string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      ignore_run_exports_from:
+        - {{ compiler('cuda') }}
+        - librmm
+    requirements:
       run:
-        - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
+        - {{ pin_subpackage('libraft-headers-only', exact=True) }}
         - cuda-profiler-api {{ cuda_profiler_api_run_version }}
+        - cudatoolkit {{ cuda_version }}
+        - librmm ={{ minor_version }}
         - libcublas {{ libcublas_run_version }}
         - libcublas-dev {{ libcublas_run_version }}
         - libcurand {{ libcurand_run_version }}
@@ -72,7 +78,6 @@ outputs:
         - libcusolver-dev {{ libcusolver_run_version }}
         - libcusparse {{ libcusparse_run_version }}
         - libcusparse-dev {{ libcusparse_run_version }}
-        - librmm ={{ minor_version }}
     about:
       home: https://rapids.ai/
       license: Apache-2.0
@@ -130,7 +135,6 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
         - cuda-profiler-api {{ cuda_profiler_api_host_version }}
         - gmock {{ gtest_version }}
         - gtest {{ gtest_version }}
@@ -144,7 +148,6 @@ outputs:
         - libcusparse-dev {{ libcusparse_host_version }}
       run:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
         - gmock {{ gtest_version }}
         - gtest {{ gtest_version }}
     about:
@@ -170,19 +173,10 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
-        - cuda-profiler-api {{ cuda_profiler_api_host_version }}
         - libcublas {{ libcublas_host_version }}
         - libcublas-dev {{ libcublas_host_version }}
-        - libcurand {{ libcurand_host_version }}
-        - libcurand-dev {{ libcurand_host_version }}
-        - libcusolver {{ libcusolver_host_version }}
-        - libcusolver-dev {{ libcusolver_host_version }}
-        - libcusparse {{ libcusparse_host_version }}
-        - libcusparse-dev {{ libcusparse_host_version }}
       run:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
     about:
       home: https://rapids.ai/
       license: Apache-2.0
@@ -206,23 +200,14 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
-        - cuda-profiler-api {{ cuda_profiler_api_host_version }}
         - libcublas {{ libcublas_host_version }}
         - libcublas-dev {{ libcublas_host_version }}
-        - libcurand {{ libcurand_host_version }}
-        - libcurand-dev {{ libcurand_host_version }}
-        - libcusolver {{ libcusolver_host_version }}
-        - libcusolver-dev {{ libcusolver_host_version }}
-        - libcusparse {{ libcusparse_host_version }}
-        - libcusparse-dev {{ libcusparse_host_version }}
         - glog {{ glog_version }}
         - nlohmann_json {{ nlohmann_json_version }}
         - libfaiss>=1.7.1
         - faiss-proc=*=cuda
       run:
         - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_subpackage('libraft-headers', exact=True) }}
         - glog {{ glog_version }}
         - faiss-proc=*=cuda
         - libfaiss {{ faiss_version }}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 144f58c4d6..6461492169 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -179,17 +179,7 @@ target_include_directories(
 )
 
 # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-target_link_libraries(
-  raft
-  INTERFACE rmm::rmm
-            cuco::cuco
-            nvidia::cutlass::cutlass
-            CUDA::cublas${_ctk_static_suffix}
-            CUDA::curand${_ctk_static_suffix}
-            CUDA::cusolver${_ctk_static_suffix}
-            CUDA::cusparse${_ctk_static_suffix}
-            raft::Thrust
-)
+target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
 
 target_compile_features(raft INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 target_compile_options(
@@ -197,6 +187,15 @@ target_compile_options(
                  --expt-relaxed-constexpr>
 )
 
+set(RAFT_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix})
+set(RAFT_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix})
+set(RAFT_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix})
+set(RAFT_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix})
+
+set(RAFT_CTK_MATH_DEPENDENCIES ${RAFT_CUBLAS_DEPENDENCY} ${RAFT_CUSOLVER_DEPENDENCY}
+                               ${RAFT_CUSPARSE_DEPENDENCY} ${RAFT_CURAND_DEPENDENCY}
+)
+
 # Endian detection
 include(TestBigEndian)
 test_big_endian(BIG_ENDIAN)
@@ -454,7 +453,13 @@ if(RAFT_COMPILE_LIBRARY)
                INTERFACE_POSITION_INDEPENDENT_CODE ON
   )
 
-  target_link_libraries(raft_lib PUBLIC raft::raft $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>)
+  target_link_libraries(
+    raft_lib
+    PUBLIC raft::raft
+           ${RAFT_CTK_MATH_DEPENDENCIES} # TODO: Once `raft::resources` is used everywhere, this
+                                         # will just be cublas
+           $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
+  )
   target_compile_options(
     raft_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RAFT_CXX_FLAGS}>"
                      "$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
@@ -606,13 +611,39 @@ if(TARGET raft_lib)
   list(APPEND raft_export_sets raft-compiled-lib-exports)
 endif()
 
+string(
+  APPEND
+  code_string
+  [=[
+ option(RAFT_ENABLE_CUSOLVER_DEPENDENCY "Enable cusolver dependency" ON)
+ option(RAFT_ENABLE_CUBLAS_DEPENDENCY "Enable cublas dependency" ON)
+ option(RAFT_ENABLE_CURAND_DEPENDENCY "Enable curand dependency" ON)
+ option(RAFT_ENABLE_CUSPARSE_DEPENDENCY "Enable cusparse dependency" ON)
+
+mark_as_advanced(RAFT_ENABLE_CUSOLVER_DEPENDENCY)
+mark_as_advanced(RAFT_ENABLE_CUBLAS_DEPENDENCY)
+mark_as_advanced(RAFT_ENABLE_CURAND_DEPENDENCY)
+mark_as_advanced(RAFT_ENABLE_CUSPARSE_DEPENDENCY)
+
+target_link_libraries(raft::raft INTERFACE
+  $<$<BOOL:${RAFT_ENABLE_CUSOLVER_DEPENDENCY}>:${RAFT_CUSOLVER_DEPENDENCY}>
+  $<$<BOOL:${RAFT_ENABLE_CUBLAS_DEPENDENCY}>:${RAFT_CUBLAS_DEPENDENCY}>
+  $<$<BOOL:${RAFT_ENABLE_CUSPARSE_DEPENDENCY}>:${RAFT_CUSPARSE_DEPENDENCY}>
+  $<$<BOOL:${RAFT_ENABLE_CURAND_DEPENDENCY}>:${RAFT_CURAND_DEPENDENCY}>
+)
+]=]
+)
+
+# Use `rapids_export` for 22.04 as it will have COMPONENT support
 rapids_export(
   INSTALL raft
   EXPORT_SET raft-exports
   COMPONENTS ${raft_components}
   COMPONENTS_EXPORT_SET ${raft_export_sets}
   GLOBAL_TARGETS raft compiled distributed
-  NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string
+  NAMESPACE raft::
+  DOCUMENTATION doc_string
+  FINAL_CODE_BLOCK code_string
 )
 
 # ##################################################################################################
@@ -622,8 +653,10 @@ rapids_export(
   EXPORT_SET raft-exports
   COMPONENTS ${raft_components}
   COMPONENTS_EXPORT_SET ${raft_export_sets}
-  GLOBAL_TARGETS raft
-  compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string
+  GLOBAL_TARGETS raft compiled distributed
+  DOCUMENTATION doc_string
+  NAMESPACE raft::
+  FINAL_CODE_BLOCK code_string
 )
 
 # ##################################################################################################
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 6267be518e..a14018a15d 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -80,6 +80,7 @@ function(ConfigureAnnBench)
             $<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
+            ${RAFT_CTK_MATH_DEPENDENCIES}
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
   )
diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
index 03975b1b7d..87a195757c 100644
--- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
+++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -965,7 +965,8 @@ inline cublasStatus_t cublasdot(cublasHandle_t handle,
                                 cudaStream_t stream)
 {
   RAFT_CUBLAS_TRY(cublasSetStream(handle, stream));
-  return cublasSdot(handle, n, x, incx, y, incy, result);
+  return cublasDotEx(
+    handle, n, x, CUDA_R_32F, incx, y, CUDA_R_32F, incy, result, CUDA_R_32F, CUDA_R_32F);
 }
 
 template <>
@@ -979,7 +980,8 @@ inline cublasStatus_t cublasdot(cublasHandle_t handle,
                                 cudaStream_t stream)
 {
   RAFT_CUBLAS_TRY(cublasSetStream(handle, stream));
-  return cublasDdot(handle, n, x, incx, y, incy, result);
+  return cublasDotEx(
+    handle, n, x, CUDA_R_64F, incx, y, CUDA_R_64F, incy, result, CUDA_R_64F, CUDA_R_64F);
 }
 /** @} */
 
diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
index 160664bae8..d81c64b257 100644
--- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
@@ -32,24 +32,6 @@
 #include <raft/spectral/eigen_solvers.cuh>
 #include <raft/spectral/matrix_wrappers.hpp>
 
-#ifdef COLLECT_TIME_STATISTICS
-#include <cuda_profiler_api.h>
-#include <stddef.h>
-#include <sys/resource.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-#endif
-
-#ifdef COLLECT_TIME_STATISTICS
-static double timer(void)
-{
-  struct timeval tv;
-  cudaDeviceSynchronize();
-  gettimeofday(&tv, NULL);
-  return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
-}
-#endif
-
 namespace raft {
 namespace spectral {
 namespace detail {
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 9109d84fe4..22e8a9d73c 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -31,12 +31,13 @@ function(ConfigureTest)
 
   target_link_libraries(
     ${TEST_NAME}
-    PRIVATE raft::raft
+    PRIVATE raft
             raft_internal
             $<$<BOOL:${ConfigureTest_LIB}>:raft::compiled>
             GTest::gtest
             GTest::gtest_main
             Threads::Threads
+            ${RAFT_CTK_MATH_DEPENDENCIES}
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
   )
diff --git a/docs/source/build.md b/docs/source/build.md
index d7550eb631..262c5703bc 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -151,18 +151,22 @@ make -j<parallel_level> install
 
 RAFT's cmake has the following configurable flags available:.
 
-| Flag                      | Possible Values      | Default Value | Behavior |
-|---------------------------|----------------------| --- | --- |
-| BUILD_TESTS               | ON, OFF              | ON | Compile Googletests |
-| BUILD_PRIMS_BENCH               | ON, OFF              | OFF | Compile benchmarks |
+| Flag                            | Possible Values      | Default Value | Behavior                                                                     |
+|---------------------------------|----------------------| --- |------------------------------------------------------------------------------|
+| BUILD_TESTS                     | ON, OFF              | ON | Compile Googletests                                                          |
+| BUILD_PRIMS_BENCH                     | ON, OFF              | OFF | Compile benchmarks                                                           |
 | BUILD_ANN_BENCH               | ON, OFF              | OFF | Compile end-to-end ANN benchmarks |
-| raft_FIND_COMPONENTS      | compiled distributed | | Configures the optional components as a space-separated list |
 | RAFT_COMPILE_LIBRARY      | ON, OFF              | ON if either BUILD_TESTS or BUILD_PRIMS_BENCH is ON; otherwise OFF | Compiles all `libraft` shared libraries (these are required for Googletests) |
-| DETECT_CONDA_ENV          | ON, OFF              | ON | Enable detection of conda environment for dependencies |
-| RAFT_NVTX                 | ON, OFF              | OFF | Enable NVTX Markers |
-| CUDA_ENABLE_KERNELINFO    | ON, OFF              | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer` |
-| CUDA_ENABLE_LINEINFO      | ON, OFF              | OFF | Enable the -lineinfo option for nvcc |
-| CUDA_STATIC_RUNTIME       | ON, OFF              | OFF | Statically link the CUDA runtime |
+| raft_FIND_COMPONENTS            | compiled distributed | | Configures the optional components as a space-separated list                 |
+| RAFT_ENABLE_CUBLAS_DEPENDENCY   | ON, OFF | ON | Link against cublas library in `raft::raft`                                  | 
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF | ON | Link against cusolver library in `raft::raft`                                | 
+| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF | ON | Link against cusparse library in `raft::raft`                                | 
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF | ON | Link against curand library in `raft::raft`                                  | 
+| DETECT_CONDA_ENV                | ON, OFF              | ON | Enable detection of conda environment for dependencies                       |
+| RAFT_NVTX                       | ON, OFF              | OFF | Enable NVTX Markers                                                          |
+| CUDA_ENABLE_KERNELINFO          | ON, OFF              | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer`         |
+| CUDA_ENABLE_LINEINFO            | ON, OFF              | OFF | Enable the -lineinfo option for nvcc                                         |
+| CUDA_STATIC_RUNTIME             | ON, OFF              | OFF | Statically link the CUDA runtime                                             |
 
 Currently, shared libraries are provided for the `libraft-nn` and `libraft-distance` components.
 

From bd69713f84e34839881de00bfc61a7f32504dc05 Mon Sep 17 00:00:00 2001
From: Jordan Jacobelli <jjacobelli@nvidia.com>
Date: Mon, 17 Apr 2023 19:22:12 +0200
Subject: [PATCH 12/78] Use ARC V2 self-hosted runners for GPU jobs (#1410)

This PR is updating the runner labels to use ARC V2 self-hosted runners for GPU jobs. This is needed to resolve the auto-scalling issues.

Authors:
  - Jordan Jacobelli (https://github.com/jjacobelli)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1410
---
 .github/workflows/build.yaml | 2 +-
 .github/workflows/pr.yaml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index bec89ab888..0f5f84c158 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -60,7 +60,7 @@ jobs:
     uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: branch
-      node_type: "gpu-latest-1"
+      node_type: "gpu-v100-latest-1"
       arch: "amd64"
       container_image: "rapidsai/ci:latest"
       run_script: "ci/build_docs.sh"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 8175b4fbc7..fc8c8d516e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -60,7 +60,7 @@ jobs:
     uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: pull-request
-      node_type: "gpu-latest-1"
+      node_type: "gpu-v100-latest-1"
       arch: "amd64"
       container_image: "rapidsai/ci:latest"
       run_script: "ci/build_docs.sh"

From c7a72bea63b4c57c40cc545ce95fb9c0252d1995 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 17 Apr 2023 11:34:47 -0700
Subject: [PATCH 13/78] Fix is_min_close (#1419)

Correlation and Cosine distance both return (1 - similarity) in the pairwise distances apis, meaning that is_min_close is returning the wrong sort order for them. Fix.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1419
---
 cpp/include/raft/distance/distance_types.hpp     | 2 --
 cpp/include/raft/sparse/neighbors/detail/knn.cuh | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/cpp/include/raft/distance/distance_types.hpp b/cpp/include/raft/distance/distance_types.hpp
index 4060147f1d..d17ef358ee 100644
--- a/cpp/include/raft/distance/distance_types.hpp
+++ b/cpp/include/raft/distance/distance_types.hpp
@@ -74,8 +74,6 @@ inline bool is_min_close(DistanceType metric)
   bool select_min;
   switch (metric) {
     case DistanceType::InnerProduct:
-    case DistanceType::CosineExpanded:
-    case DistanceType::CorrelationExpanded:
       // Similarity metrics have the opposite meaning, i.e. nearest neighbors are those with larger
       // similarity (See the same logic at cpp/include/raft/sparse/spatial/detail/knn.cuh:362
       // {perform_k_selection})
diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
index 7bedec9830..f9f07c13ca 100644
--- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
@@ -355,8 +355,7 @@ class sparse_knn_t {
     // want to adjust k.
     value_idx n_neighbors = std::min(static_cast<value_idx>(k), batch_cols);
 
-    bool ascending = true;
-    if (metric == raft::distance::DistanceType::InnerProduct) ascending = false;
+    bool ascending = raft::distance::is_min_close(metric);
 
     // kernel to slice first (min) k cols and copy into batched merge buffer
     raft::spatial::knn::select_k(batch_dists,
@@ -425,4 +424,4 @@ class sparse_knn_t {
   raft::device_resources const& handle;
 };
 
-};  // namespace raft::sparse::neighbors::detail
\ No newline at end of file
+};  // namespace raft::sparse::neighbors::detail

From ba207a05d1b4ce35338ca5a7c395d8773d98ca89 Mon Sep 17 00:00:00 2001
From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com>
Date: Mon, 17 Apr 2023 22:06:33 +0200
Subject: [PATCH 14/78] IVF-PQ: manipulating individual lists (#1298)

Add public functions for reading and writing into individual ivf-pq lists (clusters), in the input space (reconstructed data) and in flat PQ codes.

Partially solves (IVF-PQ) https://github.com/rapidsai/raft/issues/1205

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1298
---
 .../raft/neighbors/detail/ivf_pq_build.cuh    | 731 ++++++++++++++----
 .../neighbors/detail/ivf_pq_codepacking.cuh   | 214 +++++
 cpp/include/raft/neighbors/ivf_pq.cuh         |   2 +-
 cpp/include/raft/neighbors/ivf_pq_helpers.cuh | 409 ++++++++++
 cpp/test/neighbors/ann_ivf_pq.cuh             | 199 ++++-
 5 files changed, 1390 insertions(+), 165 deletions(-)
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh
 create mode 100644 cpp/include/raft/neighbors/ivf_pq_helpers.cuh

diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index 1a563d213e..36ceccc36f 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
+#include <raft/neighbors/detail/ivf_pq_codepacking.cuh>
 #include <raft/neighbors/ivf_list.hpp>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
@@ -60,63 +61,6 @@ namespace raft::neighbors::ivf_pq::detail {
 
 using namespace raft::spatial::knn::detail;  // NOLINT
 
-/** A chunk of PQ-encoded vector managed by one CUDA thread. */
-using pq_vec_t = TxN_t<uint8_t, kIndexGroupVecLen>::io_t;
-
-namespace {
-
-/**
- * This type mimics the `uint8_t&` for the indexing operator of `bitfield_view_t`.
- *
- * @tparam Bits number of bits comprising the value.
- */
-template <uint32_t Bits>
-struct bitfield_ref_t {
-  static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte");
-  constexpr static uint8_t kMask = static_cast<uint8_t>((1u << Bits) - 1u);
-  uint8_t* ptr;
-  uint32_t offset;
-
-  constexpr operator uint8_t()  // NOLINT
-  {
-    auto pair = static_cast<uint16_t>(ptr[0]);
-    if (offset + Bits > 8) { pair |= static_cast<uint16_t>(ptr[1]) << 8; }
-    return static_cast<uint8_t>((pair >> offset) & kMask);
-  }
-
-  constexpr auto operator=(uint8_t code) -> bitfield_ref_t&
-  {
-    if (offset + Bits > 8) {
-      auto pair = static_cast<uint16_t>(ptr[0]);
-      pair |= static_cast<uint16_t>(ptr[1]) << 8;
-      pair &= ~(static_cast<uint16_t>(kMask) << offset);
-      pair |= static_cast<uint16_t>(code) << offset;
-      ptr[0] = static_cast<uint8_t>(Pow2<256>::mod(pair));
-      ptr[1] = static_cast<uint8_t>(Pow2<256>::div(pair));
-    } else {
-      ptr[0] = (ptr[0] & ~(kMask << offset)) | (code << offset);
-    }
-    return *this;
-  }
-};
-
-/**
- * View a byte array as an array of unsigned integers of custom small bit size.
- *
- * @tparam Bits number of bits comprising a single element of the array.
- */
-template <uint32_t Bits>
-struct bitfield_view_t {
-  static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte");
-  uint8_t* raw;
-
-  constexpr auto operator[](uint32_t i) -> bitfield_ref_t<Bits>
-  {
-    uint32_t bit_offset = i * Bits;
-    return bitfield_ref_t<Bits>{raw + Pow2<8>::div(bit_offset), Pow2<8>::mod(bit_offset)};
-  }
-};
-
 template <uint32_t BlockDim, typename T, typename S>
 __launch_bounds__(BlockDim) __global__ void copy_warped_kernel(
   T* out, uint32_t ld_out, const S* in, uint32_t ld_in, uint32_t n_cols, size_t n_rows)
@@ -162,8 +106,6 @@ void copy_warped(T* out,
     <<<blocks, threads, 0, stream>>>(out, ld_out, in, ld_in, n_cols, n_rows);
 }
 
-}  // namespace
-
 /**
  * @brief Fill-in a random orthogonal transformation matrix.
  *
@@ -276,7 +218,7 @@ void flat_compute_residuals(
   device_matrix_view<const float, uint32_t, row_major> rotation_matrix,  // [rot_dim, dim]
   device_matrix_view<const float, uint32_t, row_major> centers,          // [n_lists, dim_ext]
   const T* dataset,                                                      // [n_rows, dim]
-  const uint32_t* labels,                                                // [n_rows]
+  std::variant<uint32_t, const uint32_t*> labels,                        // [n_rows]
   rmm::mr::device_memory_resource* device_memory)
 {
   auto stream  = handle.get_stream();
@@ -287,7 +229,9 @@ void flat_compute_residuals(
   linalg::map_offset(handle, tmp_view, [centers, dataset, labels, dim] __device__(size_t i) {
     auto row_ix = i / dim;
     auto el_ix  = i % dim;
-    auto label  = labels[row_ix];
+    auto label  = std::holds_alternative<uint32_t>(labels)
+                    ? std::get<uint32_t>(labels)
+                    : std::get<const uint32_t*>(labels)[row_ix];
     return utils::mapping<float>{}(dataset[i]) - centers(label, el_ix);
   });
 
@@ -558,11 +502,363 @@ void train_per_cluster(raft::device_resources const& handle,
 }
 
 /**
- * Compute the code: find the closest cluster in each pq_dim-subspace.
+ * A helper function: given the dataset in the rotated space
+ *  [n_rows, rot_dim] = [n_rows, pq_dim * pq_len],
+ * reinterpret the last dimension as two: [n_rows, pq_dim, pq_len]
+ *
+ * @tparam T
+ * @tparam IdxT
+ *
+ * @param vectors input data [n_rows, rot_dim]
+ * @param pq_centers codebook (used to infer the structure - pq_len)
+ * @return reinterpreted vectors [n_rows, pq_dim, pq_len]
+ */
+template <typename T, typename IdxT>
+static __device__ auto reinterpret_vectors(
+  device_matrix_view<T, IdxT, row_major> vectors,
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers)
+  -> device_mdspan<T, extent_3d<IdxT>, row_major>
+{
+  const uint32_t pq_len = pq_centers.extent(1);
+  const uint32_t pq_dim = vectors.extent(1) / pq_len;
+  using layout_t        = typename decltype(vectors)::layout_type;
+  using accessor_t      = typename decltype(vectors)::accessor_type;
+  return mdspan<T, extent_3d<IdxT>, layout_t, accessor_t>(
+    vectors.data_handle(), extent_3d<IdxT>{vectors.extent(0), pq_dim, pq_len});
+}
+
+/**
+ * A consumer for the `run_on_list` and `run_on_vector` that just flattens PQ codes
+ * one-per-byte. That is, independent of the code width (pq_bits), one code uses
+ * the whole byte, hence one vectors uses pq_dim bytes.
+ */
+struct unpack_codes {
+  device_matrix_view<uint8_t, uint32_t, row_major> out_codes;
+
+  /**
+   * Create a callable to be passed to `run_on_list`.
+   *
+   * @param[out] out_codes the destination for the read codes.
+   */
+  __device__ inline unpack_codes(device_matrix_view<uint8_t, uint32_t, row_major> out_codes)
+    : out_codes{out_codes}
+  {
+  }
+
+  /**  Write j-th component (code) of the i-th vector into the output array. */
+  __device__ inline void operator()(uint8_t code, uint32_t i, uint32_t j)
+  {
+    out_codes(i, j) = code;
+  }
+};
+
+template <uint32_t BlockSize, uint32_t PqBits>
+__launch_bounds__(BlockSize) __global__ void unpack_list_data_kernel(
+  device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> in_list_data,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  const uint32_t pq_dim = out_codes.extent(1);
+  auto unpack_action    = unpack_codes{out_codes};
+  run_on_list<PqBits>(in_list_data, offset_or_indices, out_codes.extent(0), pq_dim, unpack_action);
+}
+
+/**
+ * Unpack flat PQ codes from an existing list by the given offset.
+ *
+ * @param[out] codes flat PQ codes, one code per byte [n_rows, pq_dim]
+ * @param[in] list_data the packed ivf::list data.
+ * @param[in] offset_or_indices how many records in the list to skip or the exact indices.
+ * @param[in] pq_bits codebook size (1 << pq_bits)
+ * @param[in] stream
+ */
+inline void unpack_list_data(
+  device_matrix_view<uint8_t, uint32_t, row_major> codes,
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices,
+  uint32_t pq_bits,
+  rmm::cuda_stream_view stream)
+{
+  auto n_rows = codes.extent(0);
+  if (n_rows == 0) { return; }
+
+  constexpr uint32_t kBlockSize = 256;
+  dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
+  dim3 threads(kBlockSize, 1, 1);
+  auto kernel = [pq_bits]() {
+    switch (pq_bits) {
+      case 4: return unpack_list_data_kernel<kBlockSize, 4>;
+      case 5: return unpack_list_data_kernel<kBlockSize, 5>;
+      case 6: return unpack_list_data_kernel<kBlockSize, 6>;
+      case 7: return unpack_list_data_kernel<kBlockSize, 7>;
+      case 8: return unpack_list_data_kernel<kBlockSize, 8>;
+      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
+    }
+  }();
+  kernel<<<blocks, threads, 0, stream>>>(codes, list_data, offset_or_indices);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+/** Unpack the list data; see the public interface for the api and usage. */
+template <typename IdxT>
+void unpack_list_data(raft::device_resources const& res,
+                      const index<IdxT>& index,
+                      device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
+                      uint32_t label,
+                      std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  unpack_list_data(out_codes,
+                   index.lists()[label]->data.view(),
+                   offset_or_indices,
+                   index.pq_bits(),
+                   res.get_stream());
+}
+
+/** A consumer for the `run_on_list` and `run_on_vector` that approximates the original input data.
+ */
+struct reconstruct_vectors {
+  codebook_gen codebook_kind;
+  uint32_t cluster_ix;
+  uint32_t pq_len;
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers;
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> centers_rot;
+  device_mdspan<float, extent_3d<uint32_t>, row_major> out_vectors;
+
+  /**
+   * Create a callable to be passed to `run_on_list`.
+   *
+   * @param[out] out_vectors the destination for the decoded vectors.
+   * @param[in] pq_centers the codebook
+   * @param[in] centers_rot
+   * @param[in] codebook_kind
+   * @param[in] cluster_ix label/id of the cluster.
+   */
+  __device__ inline reconstruct_vectors(
+    device_matrix_view<float, uint32_t, row_major> out_vectors,
+    device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers,
+    device_matrix_view<const float, uint32_t, row_major> centers_rot,
+    codebook_gen codebook_kind,
+    uint32_t cluster_ix)
+    : codebook_kind{codebook_kind},
+      cluster_ix{cluster_ix},
+      pq_len{pq_centers.extent(1)},
+      pq_centers{pq_centers},
+      centers_rot{reinterpret_vectors(centers_rot, pq_centers)},
+      out_vectors{reinterpret_vectors(out_vectors, pq_centers)}
+  {
+  }
+
+  /**
+   * Decode j-th component of the i-th vector by its code and write it into a chunk of the output
+   * vectors (pq_len elements).
+   */
+  __device__ inline void operator()(uint8_t code, uint32_t i, uint32_t j)
+  {
+    uint32_t partition_ix;
+    switch (codebook_kind) {
+      case codebook_gen::PER_CLUSTER: {
+        partition_ix = cluster_ix;
+      } break;
+      case codebook_gen::PER_SUBSPACE: {
+        partition_ix = j;
+      } break;
+      default: __builtin_unreachable();
+    }
+    for (uint32_t k = 0; k < pq_len; k++) {
+      out_vectors(i, j, k) = pq_centers(partition_ix, k, code) + centers_rot(cluster_ix, j, k);
+    }
+  }
+};
+
+template <uint32_t BlockSize, uint32_t PqBits>
+__launch_bounds__(BlockSize) __global__ void reconstruct_list_data_kernel(
+  device_matrix_view<float, uint32_t, row_major> out_vectors,
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> in_list_data,
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers,
+  device_matrix_view<const float, uint32_t, row_major> centers_rot,
+  codebook_gen codebook_kind,
+  uint32_t cluster_ix,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  const uint32_t pq_dim = out_vectors.extent(1) / pq_centers.extent(1);
+  auto reconstruct_action =
+    reconstruct_vectors{out_vectors, pq_centers, centers_rot, codebook_kind, cluster_ix};
+  run_on_list<PqBits>(
+    in_list_data, offset_or_indices, out_vectors.extent(0), pq_dim, reconstruct_action);
+}
+
+/** Decode the list data; see the public interface for the api and usage. */
+template <typename T, typename IdxT>
+void reconstruct_list_data(raft::device_resources const& res,
+                           const index<IdxT>& index,
+                           device_matrix_view<T, uint32_t, row_major> out_vectors,
+                           uint32_t label,
+                           std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  auto n_rows = out_vectors.extent(0);
+  if (n_rows == 0) { return; }
+  auto& list = index.lists()[label];
+  if (std::holds_alternative<uint32_t>(offset_or_indices)) {
+    auto n_skip = std::get<uint32_t>(offset_or_indices);
+    // sic! I'm using the upper bound `list.size` instead of exact `list_sizes(label)`
+    // to avoid an extra device-host data copy and the stream sync.
+    RAFT_EXPECTS(n_skip + n_rows <= list->size.load(),
+                 "offset + output size must be not bigger than the cluster size.");
+  }
+
+  auto tmp = make_device_mdarray<float>(
+    res, res.get_workspace_resource(), make_extents<uint32_t>(n_rows, index.rot_dim()));
+
+  constexpr uint32_t kBlockSize = 256;
+  dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
+  dim3 threads(kBlockSize, 1, 1);
+  auto kernel = [](uint32_t pq_bits) {
+    switch (pq_bits) {
+      case 4: return reconstruct_list_data_kernel<kBlockSize, 4>;
+      case 5: return reconstruct_list_data_kernel<kBlockSize, 5>;
+      case 6: return reconstruct_list_data_kernel<kBlockSize, 6>;
+      case 7: return reconstruct_list_data_kernel<kBlockSize, 7>;
+      case 8: return reconstruct_list_data_kernel<kBlockSize, 8>;
+      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
+    }
+  }(index.pq_bits());
+  kernel<<<blocks, threads, 0, res.get_stream()>>>(tmp.view(),
+                                                   list->data.view(),
+                                                   index.pq_centers(),
+                                                   index.centers_rot(),
+                                                   index.codebook_kind(),
+                                                   label,
+                                                   offset_or_indices);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+  float* out_float_ptr = nullptr;
+  rmm::device_uvector<float> out_float_buf(0, res.get_stream(), res.get_workspace_resource());
+  if constexpr (std::is_same_v<T, float>) {
+    out_float_ptr = out_vectors.data_handle();
+  } else {
+    out_float_buf.resize(size_t{n_rows} * size_t{index.dim()}, res.get_stream());
+    out_float_ptr = out_float_buf.data();
+  }
+  // Rotate the results back to the original space
+  float alpha = 1.0;
+  float beta  = 0.0;
+  linalg::gemm(res,
+               false,
+               false,
+               index.dim(),
+               n_rows,
+               index.rot_dim(),
+               &alpha,
+               index.rotation_matrix().data_handle(),
+               index.dim(),
+               tmp.data_handle(),
+               index.rot_dim(),
+               &beta,
+               out_float_ptr,
+               index.dim(),
+               res.get_stream());
+  // Transform the data to the original type, if necessary
+  if constexpr (!std::is_same_v<T, float>) {
+    linalg::map(res,
+                out_vectors,
+                utils::mapping<T>{},
+                make_device_matrix_view<const float>(out_float_ptr, n_rows, index.dim()));
+  }
+}
+
+/**
+ * A producer for the `write_list` and `write_vector` reads the codes byte-by-byte. That is,
+ * independent of the code width (pq_bits), one code uses the whole byte, hence one vectors uses
+ * pq_dim bytes.
+ */
+struct pass_codes {
+  device_matrix_view<const uint8_t, uint32_t, row_major> codes;
+
+  /**
+   * Create a callable to be passed to `run_on_list`.
+   *
+   * @param[in] codes the source codes.
+   */
+  __device__ inline pass_codes(device_matrix_view<const uint8_t, uint32_t, row_major> codes)
+    : codes{codes}
+  {
+  }
+
+  /** Read j-th component (code) of the i-th vector from the source. */
+  __device__ inline auto operator()(uint32_t i, uint32_t j) const -> uint8_t { return codes(i, j); }
+};
+
+template <uint32_t BlockSize, uint32_t PqBits>
+__launch_bounds__(BlockSize) __global__ void pack_list_data_kernel(
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
+  device_matrix_view<const uint8_t, uint32_t, row_major> codes,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  write_list<PqBits, 1>(
+    list_data, offset_or_indices, codes.extent(0), codes.extent(1), pass_codes{codes});
+}
+
+/**
+ * Write flat PQ codes into an existing list by the given offset.
+ *
+ * NB: no memory allocation happens here; the list must fit the data (offset + n_rows).
+ *
+ * @param[out] list_data the packed ivf::list data.
+ * @param[in] codes flat PQ codes, one code per byte [n_rows, pq_dim]
+ * @param[in] offset_or_indices how many records in the list to skip or the exact indices.
+ * @param[in] pq_bits codebook size (1 << pq_bits)
+ * @param[in] stream
+ */
+inline void pack_list_data(
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
+  device_matrix_view<const uint8_t, uint32_t, row_major> codes,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices,
+  uint32_t pq_bits,
+  rmm::cuda_stream_view stream)
+{
+  auto n_rows = codes.extent(0);
+  if (n_rows == 0) { return; }
+
+  constexpr uint32_t kBlockSize = 256;
+  dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
+  dim3 threads(kBlockSize, 1, 1);
+  auto kernel = [pq_bits]() {
+    switch (pq_bits) {
+      case 4: return pack_list_data_kernel<kBlockSize, 4>;
+      case 5: return pack_list_data_kernel<kBlockSize, 5>;
+      case 6: return pack_list_data_kernel<kBlockSize, 6>;
+      case 7: return pack_list_data_kernel<kBlockSize, 7>;
+      case 8: return pack_list_data_kernel<kBlockSize, 8>;
+      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
+    }
+  }();
+  kernel<<<blocks, threads, 0, stream>>>(list_data, codes, offset_or_indices);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+template <typename IdxT>
+void pack_list_data(raft::device_resources const& res,
+                    index<IdxT>* index,
+                    device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
+                    uint32_t label,
+                    std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  pack_list_data(index->lists()[label]->data.view(),
+                 new_codes,
+                 offset_or_indices,
+                 index->pq_bits(),
+                 res.get_stream());
+}
+
+/**
+ *
+ * A producer for the `write_list` and `write_vector` that encodes level-1 input vector residuals
+ * into lvl-2 PQ codes.
+ * Computing a PQ code means finding the closest cluster in a pq_dim-subspace.
  *
  * @tparam SubWarpSize
  *   how many threads work on a single vector;
- *   bouded by either WarpSize or pq_book_size.
+ *   bounded by either WarpSize or pq_book_size.
  *
  * @param pq_centers
  *   - codebook_gen::PER_SUBSPACE: [pq_dim , pq_len, pq_book_size]
@@ -574,56 +870,75 @@ void train_per_cluster(raft::device_resources const& handle,
  * @param j index along pq_dim "dimension"
  * @param cluster_ix is used for PER_CLUSTER codebooks.
  */
-template <uint32_t SubWarpSize>
-__device__ auto compute_pq_code(
-  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers,
-  device_mdspan<const float, extent_2d<uint32_t>, row_major> new_vector,
-  codebook_gen codebook_kind,
-  uint32_t j,
-  uint32_t cluster_ix) -> uint8_t
-{
-  using subwarp_align = Pow2<SubWarpSize>;
-  uint32_t lane_id    = subwarp_align::mod(laneId());
-  uint32_t partition_ix;
-  switch (codebook_kind) {
-    case codebook_gen::PER_CLUSTER: {
-      partition_ix = cluster_ix;
-    } break;
-    case codebook_gen::PER_SUBSPACE: {
-      partition_ix = j;
-    } break;
-    default: __builtin_unreachable();
+/**
+ */
+template <uint32_t SubWarpSize, typename IdxT>
+struct encode_vectors {
+  codebook_gen codebook_kind;
+  uint32_t cluster_ix;
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers;
+  device_mdspan<const float, extent_3d<IdxT>, row_major> in_vectors;
+
+  __device__ inline encode_vectors(
+    device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers,
+    device_matrix_view<const float, IdxT, row_major> in_vectors,
+    codebook_gen codebook_kind,
+    uint32_t cluster_ix)
+    : codebook_kind{codebook_kind},
+      cluster_ix{cluster_ix},
+      pq_centers{pq_centers},
+      in_vectors{reinterpret_vectors(in_vectors, pq_centers)}
+  {
   }
 
-  const uint32_t pq_book_size = pq_centers.extent(2);
-  const uint32_t pq_len       = pq_centers.extent(1);
-  float min_dist              = std::numeric_limits<float>::infinity();
-  uint8_t code                = 0;
-  // calculate the distance for each PQ cluster, find the minimum for each thread
-  for (uint32_t i = lane_id; i < pq_book_size; i += subwarp_align::Value) {
-    // NB: the L2 quantifiers on residuals are always trained on L2 metric.
-    float d = 0.0f;
-    for (uint32_t k = 0; k < pq_len; k++) {
-      auto t = new_vector(j, k) - pq_centers(partition_ix, k, i);
-      d += t * t;
+  /**
+   * Decode j-th component of the i-th vector by its code and write it into a chunk of the output
+   * vectors (pq_len elements).
+   */
+  __device__ inline auto operator()(IdxT i, uint32_t j) -> uint8_t
+  {
+    uint32_t lane_id = Pow2<SubWarpSize>::mod(laneId());
+    uint32_t partition_ix;
+    switch (codebook_kind) {
+      case codebook_gen::PER_CLUSTER: {
+        partition_ix = cluster_ix;
+      } break;
+      case codebook_gen::PER_SUBSPACE: {
+        partition_ix = j;
+      } break;
+      default: __builtin_unreachable();
     }
-    if (d < min_dist) {
-      min_dist = d;
-      code     = uint8_t(i);
+
+    const uint32_t pq_book_size = pq_centers.extent(2);
+    const uint32_t pq_len       = pq_centers.extent(1);
+    float min_dist              = std::numeric_limits<float>::infinity();
+    uint8_t code                = 0;
+    // calculate the distance for each PQ cluster, find the minimum for each thread
+    for (uint32_t l = lane_id; l < pq_book_size; l += SubWarpSize) {
+      // NB: the L2 quantifiers on residuals are always trained on L2 metric.
+      float d = 0.0f;
+      for (uint32_t k = 0; k < pq_len; k++) {
+        auto t = in_vectors(i, j, k) - pq_centers(partition_ix, k, l);
+        d += t * t;
+      }
+      if (d < min_dist) {
+        min_dist = d;
+        code     = uint8_t(l);
+      }
     }
-  }
-  // reduce among threads
+    // reduce among threads
 #pragma unroll
-  for (uint32_t stride = SubWarpSize >> 1; stride > 0; stride >>= 1) {
-    const auto other_dist = shfl_xor(min_dist, stride, SubWarpSize);
-    const auto other_code = shfl_xor(code, stride, SubWarpSize);
-    if (other_dist < min_dist) {
-      min_dist = other_dist;
-      code     = other_code;
+    for (uint32_t stride = SubWarpSize >> 1; stride > 0; stride >>= 1) {
+      const auto other_dist = shfl_xor(min_dist, stride, SubWarpSize);
+      const auto other_code = shfl_xor(code, stride, SubWarpSize);
+      if (other_dist < min_dist) {
+        min_dist = other_dist;
+        code     = other_code;
+      }
     }
+    return code;
   }
-  return code;
-}
+};
 
 template <uint32_t BlockSize, uint32_t PqBits, typename IdxT>
 __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel(
@@ -639,7 +954,7 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel(
   constexpr uint32_t kSubWarpSize = std::min<uint32_t>(WarpSize, 1u << PqBits);
   using subwarp_align             = Pow2<kSubWarpSize>;
   const uint32_t lane_id          = subwarp_align::mod(threadIdx.x);
-  const IdxT row_ix = subwarp_align::div(IdxT{threadIdx.x} + IdxT{blockDim.x} * IdxT{blockIdx.x});
+  const IdxT row_ix = subwarp_align::div(IdxT{threadIdx.x} + IdxT{BlockSize} * IdxT{blockIdx.x});
   if (row_ix >= new_vectors.extent(0)) { return; }
 
   const uint32_t cluster_ix = new_labels[row_ix];
@@ -647,7 +962,7 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel(
   if (lane_id == 0) { out_ix = atomicAdd(&list_sizes(cluster_ix), 1); }
   out_ix = shfl(out_ix, 0, kSubWarpSize);
 
-  // write the label
+  // write the label  (one record per subwarp)
   auto pq_indices = inds_ptrs(cluster_ix);
   if (lane_id == 0) {
     if (std::holds_alternative<IdxT>(src_offset_or_indices)) {
@@ -657,40 +972,81 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel(
     }
   }
 
-  // write the codes
-  using group_align         = Pow2<kIndexGroupSize>;
-  const uint32_t group_ix   = group_align::div(out_ix);
-  const uint32_t ingroup_ix = group_align::mod(out_ix);
-  const uint32_t pq_len     = pq_centers.extent(1);
-  const uint32_t pq_dim     = new_vectors.extent(1) / pq_len;
-
+  // write the codes (one record per subwarp):
+  const uint32_t pq_dim = new_vectors.extent(1) / pq_centers.extent(1);
   auto pq_extents = list_spec<uint32_t, IdxT>{PqBits, pq_dim, true}.make_list_extents(out_ix + 1);
-  auto pq_extents_vectorized =
-    make_extents<uint32_t>(pq_extents.extent(0), pq_extents.extent(1), pq_extents.extent(2));
-  auto pq_dataset = make_mdspan<pq_vec_t, uint32_t, row_major, false, true>(
-    reinterpret_cast<pq_vec_t*>(data_ptrs[cluster_ix]), pq_extents_vectorized);
-
-  __shared__ pq_vec_t codes[subwarp_align::div(BlockSize)];
-  pq_vec_t& code = codes[subwarp_align::div(threadIdx.x)];
-  bitfield_view_t<PqBits> out{reinterpret_cast<uint8_t*>(&code)};
-  constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits;
-  for (uint32_t j = 0, i = 0; j < pq_dim; i++) {
-    // clear the chunk for writing
-    if (lane_id == 0) { code = pq_vec_t{}; }
-    // fill-in the values, one/pq_dim at a time
-#pragma unroll
-    for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) {
-      // find the label
-      using layout_t   = typename decltype(new_vectors)::layout_type;
-      using accessor_t = typename decltype(new_vectors)::accessor_type;
-      auto one_vector  = mdspan<const float, extent_2d<uint32_t>, layout_t, accessor_t>(
-        &new_vectors(row_ix, 0), extent_2d<uint32_t>{pq_dim, pq_len});
-      auto l = compute_pq_code<kSubWarpSize>(pq_centers, one_vector, codebook_kind, j, cluster_ix);
-      if (lane_id == 0) { out[k] = l; }
+  auto pq_dataset =
+    make_mdspan<uint8_t, uint32_t, row_major, false, true>(data_ptrs[cluster_ix], pq_extents);
+  write_vector<PqBits, kSubWarpSize>(
+    pq_dataset,
+    out_ix,
+    row_ix,
+    pq_dim,
+    encode_vectors<kSubWarpSize, IdxT>{pq_centers, new_vectors, codebook_kind, cluster_ix});
+}
+
+template <uint32_t BlockSize, uint32_t PqBits>
+__launch_bounds__(BlockSize) __global__ void encode_list_data_kernel(
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
+  device_matrix_view<const float, uint32_t, row_major> new_vectors,
+  device_mdspan<const float, extent_3d<uint32_t>, row_major> pq_centers,
+  codebook_gen codebook_kind,
+  uint32_t cluster_ix,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  constexpr uint32_t kSubWarpSize = std::min<uint32_t>(WarpSize, 1u << PqBits);
+  const uint32_t pq_dim           = new_vectors.extent(1) / pq_centers.extent(1);
+  auto encode_action =
+    encode_vectors<kSubWarpSize, uint32_t>{pq_centers, new_vectors, codebook_kind, cluster_ix};
+  write_list<PqBits, kSubWarpSize>(
+    list_data, offset_or_indices, new_vectors.extent(0), pq_dim, encode_action);
+}
+
+template <typename T, typename IdxT>
+void encode_list_data(raft::device_resources const& res,
+                      index<IdxT>* index,
+                      device_matrix_view<const T, uint32_t, row_major> new_vectors,
+                      uint32_t label,
+                      std::variant<uint32_t, const uint32_t*> offset_or_indices)
+{
+  auto n_rows = new_vectors.extent(0);
+  if (n_rows == 0) { return; }
+
+  auto mr = res.get_workspace_resource();
+
+  auto new_vectors_residual =
+    make_device_mdarray<float>(res, mr, make_extents<uint32_t>(n_rows, index->rot_dim()));
+
+  flat_compute_residuals<T, uint32_t>(res,
+                                      new_vectors_residual.data_handle(),
+                                      n_rows,
+                                      index->rotation_matrix(),
+                                      index->centers(),
+                                      new_vectors.data_handle(),
+                                      label,
+                                      mr);
+
+  constexpr uint32_t kBlockSize  = 256;
+  const uint32_t threads_per_vec = std::min<uint32_t>(WarpSize, index->pq_book_size());
+  dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize / threads_per_vec), 1, 1);
+  dim3 threads(kBlockSize, 1, 1);
+  auto kernel = [](uint32_t pq_bits) {
+    switch (pq_bits) {
+      case 4: return encode_list_data_kernel<kBlockSize, 4>;
+      case 5: return encode_list_data_kernel<kBlockSize, 5>;
+      case 6: return encode_list_data_kernel<kBlockSize, 6>;
+      case 7: return encode_list_data_kernel<kBlockSize, 7>;
+      case 8: return encode_list_data_kernel<kBlockSize, 8>;
+      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
     }
-    // write the chunk into the dataset
-    if (lane_id == 0) { pq_dataset(group_ix, i, ingroup_ix) = code; }
-  }
+  }(index->pq_bits());
+  kernel<<<blocks, threads, 0, res.get_stream()>>>(index->lists()[label]->data.view(),
+                                                   new_vectors_residual.view(),
+                                                   index->pq_centers(),
+                                                   index->codebook_kind(),
+                                                   label,
+                                                   offset_or_indices);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
 /**
@@ -732,14 +1088,14 @@ void process_and_fill_codes(raft::device_resources const& handle,
   auto new_vectors_residual =
     make_device_mdarray<float>(handle, mr, make_extents<IdxT>(n_rows, index.rot_dim()));
 
-  flat_compute_residuals(handle,
-                         new_vectors_residual.data_handle(),
-                         n_rows,
-                         index.rotation_matrix(),
-                         index.centers(),
-                         new_vectors,
-                         new_labels,
-                         mr);
+  flat_compute_residuals<T, IdxT>(handle,
+                                  new_vectors_residual.data_handle(),
+                                  n_rows,
+                                  index.rotation_matrix(),
+                                  index.centers(),
+                                  new_vectors,
+                                  new_labels,
+                                  mr);
 
   constexpr uint32_t kBlockSize  = 256;
   const uint32_t threads_per_vec = std::min<uint32_t>(WarpSize, index.pq_book_size());
@@ -819,6 +1175,85 @@ void recompute_internal_state(const raft::device_resources& res, index<IdxT>& in
   }
 }
 
+/**
+ * Helper function: allocate enough space in the list, compute the offset, at which to start
+ * writing, and fill-in indices.
+ *
+ * @return offset for writing the data
+ */
+template <typename IdxT>
+auto extend_list_prepare(raft::device_resources const& res,
+                         index<IdxT>* index,
+                         device_vector_view<const IdxT, uint32_t, row_major> new_indices,
+                         uint32_t label) -> uint32_t
+{
+  uint32_t n_rows = new_indices.extent(0);
+  uint32_t offset;
+  // Allocate the lists to fit the new data
+  copy(&offset, index->list_sizes().data_handle() + label, 1, res.get_stream());
+  res.sync_stream();
+  uint32_t new_size = offset + n_rows;
+  copy(index->list_sizes().data_handle() + label, &new_size, 1, res.get_stream());
+  auto spec = list_spec<uint32_t, IdxT>{
+    index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()};
+  auto& list = index->lists()[label];
+  ivf::resize_list(res, list, spec, new_size, offset);
+  copy(list->indices.data_handle() + offset, new_indices.data_handle(), n_rows, res.get_stream());
+  return offset;
+}
+
+/**
+ * Extend one list of the index in-place, by the list label, skipping the classification and
+ * encoding steps.
+ * See the public interface for the api and usage.
+ */
+template <typename IdxT>
+void extend_list_with_codes(raft::device_resources const& res,
+                            index<IdxT>* index,
+                            device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
+                            device_vector_view<const IdxT, uint32_t, row_major> new_indices,
+                            uint32_t label)
+{
+  // Allocate memory and write indices
+  auto offset = extend_list_prepare(res, index, new_indices, label);
+  // Pack the data
+  pack_list_data<IdxT>(res, index, new_codes, label, offset);
+  // Update the pointers and the sizes
+  recompute_internal_state(res, *index);
+}
+
+/**
+ * Extend one list of the index in-place, by the list label, skipping the classification step.
+ * See the public interface for the api and usage.
+ */
+template <typename T, typename IdxT>
+void extend_list(raft::device_resources const& res,
+                 index<IdxT>* index,
+                 device_matrix_view<const T, uint32_t, row_major> new_vectors,
+                 device_vector_view<const IdxT, uint32_t, row_major> new_indices,
+                 uint32_t label)
+{
+  // Allocate memory and write indices
+  auto offset = extend_list_prepare(res, index, new_indices, label);
+  // Encode the data
+  encode_list_data<T, IdxT>(res, index, new_vectors, label, offset);
+  // Update the pointers and the sizes
+  recompute_internal_state(res, *index);
+}
+
+/**
+ * Remove all data from a single list.
+ * See the public interface for the api and usage.
+ */
+template <typename IdxT>
+void erase_list(raft::device_resources const& res, index<IdxT>* index, uint32_t label)
+{
+  uint32_t zero = 0;
+  copy(index->list_sizes().data_handle() + label, &zero, 1, res.get_stream());
+  index->lists()[label].reset();
+  recompute_internal_state(res, *index);
+}
+
 /** Copy the state of an index into a new index, but share the list data among the two. */
 template <typename IdxT>
 auto clone(const raft::device_resources& res, const index<IdxT>& source) -> index<IdxT>
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh
new file mode 100644
index 0000000000..52969dd176
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/neighbors/ivf_list.hpp>
+#include <raft/neighbors/ivf_pq_types.hpp>
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/device_atomics.cuh>
+#include <raft/util/integer_utils.hpp>
+#include <raft/util/pow2_utils.cuh>
+#include <raft/util/vectorized.cuh>
+
+#include <variant>
+
+namespace raft::neighbors::ivf_pq::detail {
+
+/** A chunk of PQ-encoded vector managed by one CUDA thread. */
+using pq_vec_t = TxN_t<uint8_t, kIndexGroupVecLen>::io_t;
+
+/**
+ * This type mimics the `uint8_t&` for the indexing operator of `bitfield_view_t`.
+ *
+ * @tparam Bits number of bits comprising the value.
+ */
+template <uint32_t Bits>
+struct bitfield_ref_t {
+  static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte");
+  constexpr static uint8_t kMask = static_cast<uint8_t>((1u << Bits) - 1u);
+  uint8_t* ptr;
+  uint32_t offset;
+
+  constexpr operator uint8_t()  // NOLINT
+  {
+    auto pair = static_cast<uint16_t>(ptr[0]);
+    if (offset + Bits > 8) { pair |= static_cast<uint16_t>(ptr[1]) << 8; }
+    return static_cast<uint8_t>((pair >> offset) & kMask);
+  }
+
+  constexpr auto operator=(uint8_t code) -> bitfield_ref_t&
+  {
+    if (offset + Bits > 8) {
+      auto pair = static_cast<uint16_t>(ptr[0]);
+      pair |= static_cast<uint16_t>(ptr[1]) << 8;
+      pair &= ~(static_cast<uint16_t>(kMask) << offset);
+      pair |= static_cast<uint16_t>(code) << offset;
+      ptr[0] = static_cast<uint8_t>(Pow2<256>::mod(pair));
+      ptr[1] = static_cast<uint8_t>(Pow2<256>::div(pair));
+    } else {
+      ptr[0] = (ptr[0] & ~(kMask << offset)) | (code << offset);
+    }
+    return *this;
+  }
+};
+
+/**
+ * View a byte array as an array of unsigned integers of custom small bit size.
+ *
+ * @tparam Bits number of bits comprising a single element of the array.
+ */
+template <uint32_t Bits>
+struct bitfield_view_t {
+  static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte");
+  uint8_t* raw;
+
+  constexpr auto operator[](uint32_t i) -> bitfield_ref_t<Bits>
+  {
+    uint32_t bit_offset = i * Bits;
+    return bitfield_ref_t<Bits>{raw + Pow2<8>::div(bit_offset), Pow2<8>::mod(bit_offset)};
+  }
+};
+
+/**
+ * Process a single vector in a list.
+ *
+ * @tparam PqBits
+ * @tparam Action tells how to process a single vector (e.g. reconstruct or just unpack)
+ *
+ * @param[in] in_list_data the encoded cluster data.
+ * @param[in] in_ix in-cluster index of the vector to be decoded (one-per-thread).
+ * @param[in] out_ix the output index passed to the action
+ * @param[in] pq_dim
+ * @param action a callable action to be invoked on each PQ code (component of the encoding)
+ *    type: void (uint8_t code, uint32_t out_ix, uint32_t j), where j = [0..pq_dim).
+ */
+template <uint32_t PqBits, typename Action>
+__device__ void run_on_vector(
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> in_list_data,
+  uint32_t in_ix,
+  uint32_t out_ix,
+  uint32_t pq_dim,
+  Action action)
+{
+  using group_align         = Pow2<kIndexGroupSize>;
+  const uint32_t group_ix   = group_align::div(in_ix);
+  const uint32_t ingroup_ix = group_align::mod(in_ix);
+
+  pq_vec_t code_chunk;
+  bitfield_view_t<PqBits> code_view{reinterpret_cast<uint8_t*>(&code_chunk)};
+  constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits;
+  for (uint32_t j = 0, i = 0; j < pq_dim; i++) {
+    // read the chunk
+    code_chunk = *reinterpret_cast<const pq_vec_t*>(&in_list_data(group_ix, i, ingroup_ix, 0));
+    // read the codes, one/pq_dim at a time
+#pragma unroll
+    for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) {
+      // read a piece of the reconstructed vector
+      action(code_view[k], out_ix, j);
+    }
+  }
+}
+
+/**
+ * Process a single vector in a list.
+ *
+ * @tparam PqBits
+ * @tparam SubWarpSize how many threads work on the same ix (only the first thread writes data).
+ * @tparam IdxT type of the index passed to the action
+ * @tparam Action tells how to process a single vector (e.g. encode or just pack)
+ *
+ * @param[in] out_list_data the encoded cluster data.
+ * @param[in] out_ix in-cluster index of the vector to be processed (one-per-SubWarpSize threads).
+ * @param[in] in_ix the input index passed to the action (one-per-SubWarpSize threads).
+ * @param[in] pq_dim
+ * @param action a callable action to be invoked on each PQ code (component of the encoding)
+ *    type: (uint32_t in_ix, uint32_t j) -> uint8_t, where j = [0..pq_dim).
+ */
+template <uint32_t PqBits, uint32_t SubWarpSize, typename IdxT, typename Action>
+__device__ void write_vector(
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> out_list_data,
+  uint32_t out_ix,
+  IdxT in_ix,
+  uint32_t pq_dim,
+  Action action)
+{
+  const uint32_t lane_id = Pow2<SubWarpSize>::mod(threadIdx.x);
+
+  using group_align         = Pow2<kIndexGroupSize>;
+  const uint32_t group_ix   = group_align::div(out_ix);
+  const uint32_t ingroup_ix = group_align::mod(out_ix);
+
+  pq_vec_t code_chunk;
+  bitfield_view_t<PqBits> code_view{reinterpret_cast<uint8_t*>(&code_chunk)};
+  constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits;
+  for (uint32_t j = 0, i = 0; j < pq_dim; i++) {
+    // clear the chunk
+    if (lane_id == 0) { code_chunk = pq_vec_t{}; }
+    // write the codes, one/pq_dim at a time
+#pragma unroll
+    for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) {
+      // write a single code
+      uint8_t code = action(in_ix, j);
+      if (lane_id == 0) { code_view[k] = code; }
+    }
+    // write the chunk to the list
+    if (lane_id == 0) {
+      *reinterpret_cast<pq_vec_t*>(&out_list_data(group_ix, i, ingroup_ix, 0)) = code_chunk;
+    }
+  }
+}
+
+/** Process the given indices or a block of a single list (cluster). */
+template <uint32_t PqBits, typename Action>
+__device__ void run_on_list(
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> in_list_data,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices,
+  uint32_t len,
+  uint32_t pq_dim,
+  Action action)
+{
+  for (uint32_t ix = threadIdx.x + blockDim.x * blockIdx.x; ix < len; ix += blockDim.x) {
+    const uint32_t src_ix = std::holds_alternative<uint32_t>(offset_or_indices)
+                              ? std::get<uint32_t>(offset_or_indices) + ix
+                              : std::get<const uint32_t*>(offset_or_indices)[ix];
+    run_on_vector<PqBits>(in_list_data, src_ix, ix, pq_dim, action);
+  }
+}
+
+/** Process the given indices or a block of a single list (cluster). */
+template <uint32_t PqBits, uint32_t SubWarpSize, typename Action>
+__device__ void write_list(
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> out_list_data,
+  std::variant<uint32_t, const uint32_t*> offset_or_indices,
+  uint32_t len,
+  uint32_t pq_dim,
+  Action action)
+{
+  using subwarp_align = Pow2<SubWarpSize>;
+  uint32_t stride     = subwarp_align::div(blockDim.x);
+  uint32_t ix         = subwarp_align::div(threadIdx.x + blockDim.x * blockIdx.x);
+  for (; ix < len; ix += stride) {
+    const uint32_t dst_ix = std::holds_alternative<uint32_t>(offset_or_indices)
+                              ? std::get<uint32_t>(offset_or_indices) + ix
+                              : std::get<const uint32_t*>(offset_or_indices)[ix];
+    write_vector<PqBits, SubWarpSize>(out_list_data, dst_ix, ix, pq_dim, action);
+  }
+}
+
+}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/include/raft/neighbors/ivf_pq.cuh b/cpp/include/raft/neighbors/ivf_pq.cuh
index 934643e0af..dfc24e8214 100644
--- a/cpp/include/raft/neighbors/ivf_pq.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq.cuh
@@ -234,7 +234,7 @@ auto build(raft::device_resources const& handle,
  * @brief Build a new index containing the data of the original plus new extra vectors.
  *
  * Implementation note:
- *    The new data is clustered according to existing kmeans clusters, then the cluster
+ *    The new data is clustered according to existing kmeans clusters, the cluster
  *    centers are unchanged.
  *
  * Usage example:
diff --git a/cpp/include/raft/neighbors/ivf_pq_helpers.cuh b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh
new file mode 100644
index 0000000000..398bd545f1
--- /dev/null
+++ b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/neighbors/detail/ivf_pq_build.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+
+namespace raft::neighbors::ivf_pq::helpers {
+/**
+ * @defgroup ivf_pq_helpers Helper functions for manipulationg IVF PQ Index
+ * @{
+ */
+
+namespace codepacker {
+/**
+ * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index
+ * starting at given `offset`.
+ *
+ * Bit compression is removed, which means output will have pq_dim dimensional vectors (one code per
+ * byte, instead of ceildiv(pq_dim * pq_bits, 8) bytes of pq codes).
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   auto list_data = index.lists()[label]->data.view();
+ *   // allocate the buffer for the output
+ *   uint32_t n_take = 4;
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, n_take, index.pq_dim());
+ *   uint32_t offset = 0;
+ *   // unpack n_take elements from the list
+ *   ivf_pq::helpers::codepacker::unpack(res, list_data, index.pq_bits(), offset, codes.view());
+ * @endcode
+ *
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res raft resource
+ * @param[in] list_data block to read from
+ * @param[in] pq_bits bit length of encoded vector elements
+ * @param[in] offset
+ *   How many records in the list to skip.
+ * @param[out] codes
+ *   the destination buffer [n_take, index.pq_dim()].
+ *   The length `n_take` defines how many records to unpack,
+ *   it must be smaller than the list size.
+ */
+inline void unpack(
+  raft::device_resources const& res,
+  device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
+  uint32_t pq_bits,
+  uint32_t offset,
+  device_matrix_view<uint8_t, uint32_t, row_major> codes)
+{
+  ivf_pq::detail::unpack_list_data(codes, list_data, offset, pq_bits, res.get_stream());
+}
+
+/**
+ * Write flat PQ codes into an existing list by the given offset.
+ *
+ * NB: no memory allocation happens here; the list must fit the data (offset + n_vec).
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   auto list_data  = index.lists()[label]->data.view();
+ *   // allocate the buffer for the input codes
+ *   auto codes = raft::make_device_matrix<uint8_t>(res, n_vec, index.pq_dim());
+ *   ... prepare n_vecs to pack into the list in codes ...
+ *   // write codes into the list starting from the 42nd position
+ *   ivf_pq::helpers::codepacker::pack(
+ *       res, make_const_mdspan(codes.view()), index.pq_bits(), 42, list_data);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[in] codes flat PQ codes, one code per byte [n_vec, pq_dim]
+ * @param[in] pq_bits bit length of encoded vector elements
+ * @param[in] offset how many records to skip before writing the data into the list
+ * @param[in] list_data block to write into
+ */
+inline void pack(
+  raft::device_resources const& res,
+  device_matrix_view<const uint8_t, uint32_t, row_major> codes,
+  uint32_t pq_bits,
+  uint32_t offset,
+  device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data)
+{
+  ivf_pq::detail::pack_list_data(list_data, codes, offset, pq_bits, res.get_stream());
+}
+}  // namespace codepacker
+
+/**
+ * Write flat PQ codes into an existing list by the given offset.
+ *
+ * The list is identified by its label.
+ *
+ * NB: no memory allocation happens here; the list must fit the data (offset + n_vec).
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will write into the 137th cluster
+ *   uint32_t label = 137;
+ *   // allocate the buffer for the input codes
+ *   auto codes = raft::make_device_matrix<const uint8_t>(res, n_vec, index.pq_dim());
+ *   ... prepare n_vecs to pack into the list in codes ...
+ *   // write codes into the list starting from the 42nd position
+ *   ivf_pq::helpers::pack_list_data(res, &index, codes_to_pack, label, 42);
+ * @endcode
+ *
+ * @param[in] res
+ * @param[inout] index IVF-PQ index.
+ * @param[in] codes flat PQ codes, one code per byte [n_rows, pq_dim]
+ * @param[in] label The id of the list (cluster) into which we write.
+ * @param[in] offset how many records to skip before writing the data into the list
+ */
+template <typename IdxT>
+void pack_list_data(raft::device_resources const& res,
+                    index<IdxT>* index,
+                    device_matrix_view<const uint8_t, uint32_t, row_major> codes,
+                    uint32_t label,
+                    uint32_t offset)
+{
+  ivf_pq::detail::pack_list_data(res, index, codes, label, offset);
+}
+
+/**
+ * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index
+ * starting at given `offset`, one code per byte (independently of pq_bits).
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will unpack the fourth cluster
+ *   uint32_t label = 3;
+ *   // Get the list size
+ *   uint32_t list_size = 0;
+ *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream());
+ *   res.sync_stream();
+ *   // allocate the buffer for the output
+ *   auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim());
+ *   // unpack the whole list
+ *   ivf_pq::helpers::unpack_list_data(res, index, codes.view(), label, 0);
+ * @endcode
+ *
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res
+ * @param[in] index
+ * @param[out] out_codes
+ *   the destination buffer [n_take, index.pq_dim()].
+ *   The length `n_take` defines how many records to unpack,
+ *   it must be smaller than the list size.
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[in] offset
+ *   How many records in the list to skip.
+ */
+template <typename IdxT>
+void unpack_list_data(raft::device_resources const& res,
+                      const index<IdxT>& index,
+                      device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
+                      uint32_t label,
+                      uint32_t offset)
+{
+  return ivf_pq::detail::unpack_list_data<IdxT>(res, index, out_codes, label, offset);
+}
+
+/**
+ * @brief Unpack a series of records of a single list (cluster) in the compressed index
+ * by their in-list offsets, one code per byte (independently of pq_bits).
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will unpack the fourth cluster
+ *   uint32_t label = 3;
+ *   // Create the selection vector
+ *   auto selected_indices = raft::make_device_vector<uint32_t>(res, 4);
+ *   ... fill the indices ...
+ *   res.sync_stream();
+ *   // allocate the buffer for the output
+ *   auto codes = raft::make_device_matrix<float>(res, selected_indices.size(), index.pq_dim());
+ *   // decode the whole list
+ *   ivf_pq::helpers::unpack_list_data(
+ *       res, index, selected_indices.view(), codes.view(), label);
+ * @endcode
+ *
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res
+ * @param[in] index
+ * @param[in] in_cluster_indices
+ *   The offsets of the selected indices within the cluster.
+ * @param[out] out_codes
+ *   the destination buffer [n_take, index.pq_dim()].
+ *   The length `n_take` defines how many records to unpack,
+ *   it must be smaller than the list size.
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ */
+template <typename IdxT>
+void unpack_list_data(raft::device_resources const& res,
+                      const index<IdxT>& index,
+                      device_vector_view<const uint32_t> in_cluster_indices,
+                      device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
+                      uint32_t label)
+{
+  return ivf_pq::detail::unpack_list_data<IdxT>(res, index, out_codes, label, in_cluster_indices);
+}
+
+/**
+ * @brief Decode `n_take` consecutive records of a single list (cluster) in the compressed index
+ * starting at given `offset`.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will reconstruct the fourth cluster
+ *   uint32_t label = 3;
+ *   // Get the list size
+ *   uint32_t list_size = 0;
+ *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream());
+ *   res.sync_stream();
+ *   // allocate the buffer for the output
+ *   auto decoded_vectors = raft::make_device_matrix<float>(res, list_size, index.dim());
+ *   // decode the whole list
+ *   ivf_pq::helpers::reconstruct_list_data(res, index, decoded_vectors.view(), label, 0);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res
+ * @param[in] index
+ * @param[out] out_vectors
+ *   the destination buffer [n_take, index.dim()].
+ *   The length `n_take` defines how many records to reconstruct,
+ *   it must be smaller than the list size.
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ * @param[in] offset
+ *   How many records in the list to skip.
+ */
+template <typename T, typename IdxT>
+void reconstruct_list_data(raft::device_resources const& res,
+                           const index<IdxT>& index,
+                           device_matrix_view<T, uint32_t, row_major> out_vectors,
+                           uint32_t label,
+                           uint32_t offset)
+{
+  return ivf_pq::detail::reconstruct_list_data(res, index, out_vectors, label, offset);
+}
+
+/**
+ * @brief Decode a series of records of a single list (cluster) in the compressed index
+ * by their in-list offsets.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will reconstruct the fourth cluster
+ *   uint32_t label = 3;
+ *   // Create the selection vector
+ *   auto selected_indices = raft::make_device_vector<uint32_t>(res, 4);
+ *   ... fill the indices ...
+ *   res.sync_stream();
+ *   // allocate the buffer for the output
+ *   auto decoded_vectors = raft::make_device_matrix<float>(
+ *                             res, selected_indices.size(), index.dim());
+ *   // decode the whole list
+ *   ivf_pq::helpers::reconstruct_list_data(
+ *       res, index, selected_indices.view(), decoded_vectors.view(), label);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res
+ * @param[in] index
+ * @param[in] in_cluster_indices
+ *   The offsets of the selected indices within the cluster.
+ * @param[out] out_vectors
+ *   the destination buffer [n_take, index.dim()].
+ *   The length `n_take` defines how many records to reconstruct,
+ *   it must be smaller than the list size.
+ * @param[in] label
+ *   The id of the list (cluster) to decode.
+ */
+template <typename T, typename IdxT>
+void reconstruct_list_data(raft::device_resources const& res,
+                           const index<IdxT>& index,
+                           device_vector_view<const uint32_t> in_cluster_indices,
+                           device_matrix_view<T, uint32_t, row_major> out_vectors,
+                           uint32_t label)
+{
+  return ivf_pq::detail::reconstruct_list_data(res, index, out_vectors, label, in_cluster_indices);
+}
+
+/**
+ * @brief Extend one list of the index in-place, by the list label, skipping the classification and
+ * encoding steps.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will extend the fourth cluster
+ *   uint32_t label = 3;
+ *   // We will fill 4 new vectors
+ *   uint32_t n_vec = 4;
+ *   // Indices of the new vectors
+ *   auto indices = raft::make_device_vector<uint32_t>(res, n_vec);
+ *   ... fill the indices ...
+ *   auto new_codes = raft::make_device_matrix<uint8_t, uint32_t, row_major> new_codes(
+ *       res, n_vec, index.pq_dim());
+ *   ... fill codes ...
+ *   // extend list with new codes
+ *   ivf_pq::helpers::extend_list_with_codes(
+ *       res, &index, codes.view(), indices.view(), label);
+ * @endcode
+ *
+ * @tparam IdxT
+ *
+ * @param[in] res
+ * @param[inout] index
+ * @param[in] new_codes flat PQ codes, one code per byte [n_rows, index.pq_dim()]
+ * @param[in] new_indices source indices [n_rows]
+ * @param[in] label the id of the target list (cluster).
+ */
+template <typename IdxT>
+void extend_list_with_codes(raft::device_resources const& res,
+                            index<IdxT>* index,
+                            device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
+                            device_vector_view<const IdxT, uint32_t, row_major> new_indices,
+                            uint32_t label)
+{
+  ivf_pq::detail::extend_list_with_codes(res, index, new_codes, new_indices, label);
+}
+
+/**
+ * @brief Extend one list of the index in-place, by the list label, skipping the classification
+ * step.
+ *
+ *  Usage example:
+ * @code{.cpp}
+ *   // We will extend the fourth cluster
+ *   uint32_t label = 3;
+ *   // We will extend with 4 new vectors
+ *   uint32_t n_vec = 4;
+ *   // Indices of the new vectors
+ *   auto indices = raft::make_device_vector<uint32_t>(res, n_vec);
+ *   ... fill the indices ...
+ *   auto new_vectors = raft::make_device_matrix<float, uint32_t, row_major> new_codes(
+ *       res, n_vec, index.dim());
+ *   ... fill vectors ...
+ *   // extend list with new vectors
+ *   ivf_pq::helpers::extend_list(
+ *       res, &index, new_vectors.view(), indices.view(), label);
+ * @endcode
+ *
+ * @tparam T
+ * @tparam IdxT
+ *
+ * @param[in] res
+ * @param[inout] index
+ * @param[in] new_vectors data to encode [n_rows, index.dim()]
+ * @param[in] new_indices source indices [n_rows]
+ * @param[in] label the id of the target list (cluster).
+ *
+ */
+template <typename T, typename IdxT>
+void extend_list(raft::device_resources const& res,
+                 index<IdxT>* index,
+                 device_matrix_view<const T, uint32_t, row_major> new_vectors,
+                 device_vector_view<const IdxT, uint32_t, row_major> new_indices,
+                 uint32_t label)
+{
+  ivf_pq::detail::extend_list(res, index, new_vectors, new_indices, label);
+}
+
+/**
+ * @brief Remove all data from a single list (cluster) in the index.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   // We will erase the fourth cluster (label = 3)
+ *   ivf_pq::helpers::erase_list(res, &index, 3);
+ * @endcode
+ *
+ * @tparam IdxT
+ * @param[in] res
+ * @param[inout] index
+ * @param[in] label the id of the target list (cluster).
+ */
+template <typename IdxT>
+void erase_list(raft::device_resources const& res, index<IdxT>* index, uint32_t label)
+{
+  ivf_pq::detail::erase_list(res, index, label);
+}
+
+/** @} */
+}  // namespace raft::neighbors::ivf_pq::helpers
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index c69829821a..07efcb099e 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -22,7 +22,11 @@
 
 #include <raft/core/logger.hpp>
 #include <raft/distance/distance_types.hpp>
+#include <raft/linalg/map.cuh>
+#include <raft/linalg/map_reduce.cuh>
+#include <raft/matrix/gather.cuh>
 #include <raft/neighbors/ivf_pq.cuh>
+#include <raft/neighbors/ivf_pq_helpers.cuh>
 #include <raft/random/rng.cuh>
 #ifdef RAFT_COMPILED
 #include <raft/neighbors/specializations.cuh>
@@ -38,8 +42,6 @@
 #include <gtest/gtest.h>
 
 #include <cub/cub.cuh>
-#include <thrust/reduce.h>
-#include <thrust/sequence.h>
 
 #include <algorithm>
 #include <cstddef>
@@ -115,6 +117,33 @@ inline auto operator<<(std::ostream& os, const ivf_pq_inputs& p) -> std::ostream
   return os;
 }
 
+template <typename T>
+void compare_vectors_l2(
+  const raft::device_resources& res, T a, T b, uint32_t label, double compression_ratio, double eps)
+{
+  auto n_rows = a.extent(0);
+  auto dim    = a.extent(1);
+  rmm::mr::managed_memory_resource managed_memory;
+  auto dist = make_device_mdarray<double>(res, &managed_memory, make_extents<uint32_t>(n_rows));
+  linalg::map_offset(res, dist.view(), [a, b, dim] __device__(uint32_t i) {
+    spatial::knn::detail::utils::mapping<float> f{};
+    double d = 0.0f;
+    for (uint32_t j = 0; j < dim; j++) {
+      double t = f(a(i, j)) - f(b(i, j));
+      d += t * t;
+    }
+    return sqrt(d / double(dim));
+  });
+  res.sync_stream();
+  for (uint32_t i = 0; i < n_rows; i++) {
+    double d = dist(i);
+    // The theoretical estimate of the error is hard to come up with,
+    // the estimate below is based on experimentation + curse of dimensionality
+    ASSERT_LE(d, 1.2 * eps * std::pow(2.0, compression_ratio))
+      << " (label = " << label << ", ix = " << i << ", eps = " << eps << ")";
+  }
+}
+
 template <typename IdxT>
 auto min_output_size(const raft::device_resources& handle,
                      const ivf_pq::index<IdxT>& index,
@@ -139,7 +168,6 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
   {
   }
 
- protected:
   void gen_data()
   {
     database.resize(size_t{ps.num_db_vecs} * size_t{ps.dim}, stream_);
@@ -178,7 +206,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     handle_.sync_stream(stream_);
   }
 
-  index<IdxT> build_only()
+  auto build_only()
   {
     auto ipams              = ps.index_params;
     ipams.add_data_on_build = true;
@@ -188,19 +216,17 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     return ivf_pq::build<DataT, IdxT>(handle_, ipams, index_view);
   }
 
-  index<IdxT> build_2_extends()
+  auto build_2_extends()
   {
-    rmm::device_uvector<IdxT> db_indices(ps.num_db_vecs, stream_);
-    thrust::sequence(handle_.get_thrust_policy(),
-                     thrust::device_pointer_cast(db_indices.data()),
-                     thrust::device_pointer_cast(db_indices.data() + ps.num_db_vecs));
+    auto db_indices = make_device_vector<IdxT>(handle_, ps.num_db_vecs);
+    linalg::map_offset(handle_, db_indices.view(), identity_op{});
     handle_.sync_stream(stream_);
     auto size_1 = IdxT(ps.num_db_vecs) / 2;
     auto size_2 = IdxT(ps.num_db_vecs) - size_1;
     auto vecs_1 = database.data();
     auto vecs_2 = database.data() + size_t(size_1) * size_t(ps.dim);
-    auto inds_1 = db_indices.data();
-    auto inds_2 = db_indices.data() + size_t(size_1);
+    auto inds_1 = db_indices.data_handle();
+    auto inds_2 = db_indices.data_handle() + size_t(size_1);
 
     auto ipams              = ps.index_params;
     ipams.add_data_on_build = false;
@@ -220,17 +246,160 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     return idx;
   }
 
-  index<IdxT> build_serialize()
+  auto build_serialize()
   {
     ivf_pq::serialize<IdxT>(handle_, "ivf_pq_index", build_only());
     return ivf_pq::deserialize<IdxT>(handle_, "ivf_pq_index");
   }
 
+  void check_reconstruction(const index<IdxT>& index,
+                            double compression_ratio,
+                            uint32_t label,
+                            uint32_t n_take,
+                            uint32_t n_skip)
+  {
+    auto& rec_list = index.lists()[label];
+    auto dim       = index.dim();
+    n_take         = std::min<uint32_t>(n_take, rec_list->size.load());
+    n_skip         = std::min<uint32_t>(n_skip, rec_list->size.load() - n_take);
+
+    if (n_take == 0) { return; }
+
+    auto rec_data  = make_device_matrix<DataT>(handle_, n_take, dim);
+    auto orig_data = make_device_matrix<DataT>(handle_, n_take, dim);
+
+    ivf_pq::helpers::reconstruct_list_data(handle_, index, rec_data.view(), label, n_skip);
+
+    matrix::gather(database.data(),
+                   IdxT{dim},
+                   IdxT{n_take},
+                   rec_list->indices.data_handle() + n_skip,
+                   IdxT{n_take},
+                   orig_data.data_handle(),
+                   stream_);
+
+    compare_vectors_l2(handle_, rec_data.view(), orig_data.view(), label, compression_ratio, 0.06);
+  }
+
+  void check_reconstruct_extend(index<IdxT>* index, double compression_ratio, uint32_t label)
+  {
+    // NB: this is not reference, the list is retained; the index will have to create a new list on
+    // `erase_list` op.
+    auto old_list = index->lists()[label];
+    auto n_rows   = old_list->size.load();
+    if (n_rows == 0) { return; }
+
+    auto vectors_1 = make_device_matrix<EvalT>(handle_, n_rows, index->dim());
+    auto indices   = make_device_vector<IdxT>(handle_, n_rows);
+    copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_);
+
+    ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_1.view(), label, 0);
+    ivf_pq::helpers::erase_list(handle_, index, label);
+    // NB: passing the type parameter because const->non-const implicit conversion of the mdspans
+    // breaks type inference
+    ivf_pq::helpers::extend_list<EvalT, IdxT>(
+      handle_, index, vectors_1.view(), indices.view(), label);
+
+    auto& new_list = index->lists()[label];
+    ASSERT_NE(old_list.get(), new_list.get())
+      << "The old list should have been shared and retained after ivf_pq index has erased the "
+         "corresponding cluster.";
+
+    auto vectors_2 = make_device_matrix<EvalT>(handle_, n_rows, index->dim());
+    ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_2.view(), label, 0);
+    // The code search is unstable, and there's high chance of repeating values of the lvl-2 codes.
+    // Hence, encoding-decoding chain often leads to altering both the PQ codes and the
+    // reconstructed data.
+    compare_vectors_l2(
+      handle_, vectors_1.view(), vectors_2.view(), label, compression_ratio, 0.025);
+  }
+
+  void check_packing(index<IdxT>* index, uint32_t label)
+  {
+    auto old_list = index->lists()[label];
+    auto n_rows   = old_list->size.load();
+
+    if (n_rows == 0) { return; }
+
+    auto codes   = make_device_matrix<uint8_t>(handle_, n_rows, index->pq_dim());
+    auto indices = make_device_vector<IdxT>(handle_, n_rows);
+    copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_);
+
+    ivf_pq::helpers::unpack_list_data(handle_, *index, codes.view(), label, 0);
+    ivf_pq::helpers::erase_list(handle_, index, label);
+    ivf_pq::helpers::extend_list_with_codes<IdxT>(
+      handle_, index, codes.view(), indices.view(), label);
+
+    auto& new_list = index->lists()[label];
+    ASSERT_NE(old_list.get(), new_list.get())
+      << "The old list should have been shared and retained after ivf_pq index has erased the "
+         "corresponding cluster.";
+    auto list_data_size = (n_rows / ivf_pq::kIndexGroupSize) * new_list->data.extent(1) *
+                          new_list->data.extent(2) * new_list->data.extent(3);
+
+    ASSERT_TRUE(old_list->data.size() >= list_data_size);
+    ASSERT_TRUE(new_list->data.size() >= list_data_size);
+    ASSERT_TRUE(devArrMatch(old_list->data.data_handle(),
+                            new_list->data.data_handle(),
+                            list_data_size,
+                            Compare<uint8_t>{}));
+
+    // Pack a few vectors back to the list.
+    int row_offset = 9;
+    int n_vec      = 3;
+    ASSERT_TRUE(row_offset + n_vec < n_rows);
+    size_t offset      = row_offset * index->pq_dim();
+    auto codes_to_pack = make_device_matrix_view<const uint8_t, uint32_t>(
+      codes.data_handle() + offset, n_vec, index->pq_dim());
+    ivf_pq::helpers::pack_list_data(handle_, index, codes_to_pack, label, row_offset);
+    ASSERT_TRUE(devArrMatch(old_list->data.data_handle(),
+                            new_list->data.data_handle(),
+                            list_data_size,
+                            Compare<uint8_t>{}));
+
+    // Another test with the API that take list_data directly
+    auto list_data  = index->lists()[label]->data.view();
+    uint32_t n_take = 4;
+    ASSERT_TRUE(row_offset + n_take < n_rows);
+    auto codes2 = raft::make_device_matrix<uint8_t>(handle_, n_take, index->pq_dim());
+    ivf_pq::helpers::codepacker::unpack(
+      handle_, list_data, index->pq_bits(), row_offset, codes2.view());
+
+    // Write it back
+    ivf_pq::helpers::codepacker::pack(
+      handle_, make_const_mdspan(codes2.view()), index->pq_bits(), row_offset, list_data);
+    ASSERT_TRUE(devArrMatch(old_list->data.data_handle(),
+                            new_list->data.data_handle(),
+                            list_data_size,
+                            Compare<uint8_t>{}));
+  }
+
   template <typename BuildIndex>
   void run(BuildIndex build_index)
   {
     index<IdxT> index = build_index();
 
+    double compression_ratio =
+      static_cast<double>(ps.dim * 8) / static_cast<double>(index.pq_dim() * index.pq_bits());
+
+    for (uint32_t label = 0; label < index.n_lists(); label++) {
+      switch (label % 3) {
+        case 0: {
+          // Reconstruct and re-write vectors for one label
+          check_reconstruct_extend(&index, compression_ratio, label);
+        } break;
+        case 1: {
+          // Dump and re-write codes for one label
+          check_packing(&index, label);
+        } break;
+        default: {
+          // check a small subset of data in a randomly chosen cluster to see if the data
+          // reconstruction works well.
+          check_reconstruction(index, compression_ratio, label, 100, 7);
+        }
+      }
+    }
+
     size_t queries_size = ps.num_queries * ps.k;
     std::vector<IdxT> indices_ivf_pq(queries_size);
     std::vector<EvalT> distances_ivf_pq(queries_size);
@@ -255,11 +424,9 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     // A very conservative lower bound on recall
     double min_recall =
       static_cast<double>(ps.search_params.n_probes) / static_cast<double>(ps.index_params.n_lists);
-    double low_precision_factor =
-      static_cast<double>(ps.dim * 8) / static_cast<double>(index.pq_dim() * index.pq_bits());
     // Using a heuristic to lower the required recall due to code-packing errors
     min_recall =
-      std::min(std::erfc(0.05 * low_precision_factor / std::max(min_recall, 0.5)), min_recall);
+      std::min(std::erfc(0.05 * compression_ratio / std::max(min_recall, 0.5)), min_recall);
     // Use explicit per-test min recall value if provided.
     min_recall = ps.min_recall.value_or(min_recall);
 
@@ -269,7 +436,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
                                 distances_ivf_pq,
                                 ps.num_queries,
                                 ps.k,
-                                0.0001 * low_precision_factor,
+                                0.0001 * compression_ratio,
                                 min_recall))
       << ps;
 

From 574f8f8819465a8d03653f3cd6f66c342544cc32 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Mon, 17 Apr 2023 14:19:51 -0700
Subject: [PATCH 15/78] Add python bindings for matrix::select_k (#1422)

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1422
---
 cpp/CMakeLists.txt                            |   7 +-
 cpp/include/raft/matrix/select_k.cuh          |  20 +--
 cpp/include/raft_runtime/matrix/select_k.hpp  |  32 +++++
 .../raft_internal/matrix/select_k.cuh         |  13 +-
 cpp/src/matrix/select_k_float_int64_t.cu      |  37 +++++
 python/pylibraft/CMakeLists.txt               |   1 +
 .../pylibraft/pylibraft/matrix/CMakeLists.txt |  24 ++++
 .../pylibraft/pylibraft/matrix/__init__.pxd   |  14 ++
 python/pylibraft/pylibraft/matrix/__init__.py |  18 +++
 .../pylibraft/matrix/cpp/__init__.pxd         |   0
 .../pylibraft/matrix/cpp/__init__.py          |  14 ++
 .../pylibraft/matrix/cpp/select_k.pxd         |  39 +++++
 .../pylibraft/pylibraft/matrix/select_k.pyx   | 133 ++++++++++++++++++
 .../pylibraft/neighbors/brute_force.pyx       |   3 +-
 ...test_brue_force.py => test_brute_force.py} |   0
 .../pylibraft/pylibraft/test/test_doctests.py |   2 +
 .../pylibraft/pylibraft/test/test_select_k.py |  54 +++++++
 17 files changed, 389 insertions(+), 22 deletions(-)
 create mode 100644 cpp/include/raft_runtime/matrix/select_k.hpp
 create mode 100644 cpp/src/matrix/select_k_float_int64_t.cu
 create mode 100644 python/pylibraft/pylibraft/matrix/CMakeLists.txt
 create mode 100644 python/pylibraft/pylibraft/matrix/__init__.pxd
 create mode 100644 python/pylibraft/pylibraft/matrix/__init__.py
 create mode 100644 python/pylibraft/pylibraft/matrix/cpp/__init__.pxd
 create mode 100644 python/pylibraft/pylibraft/matrix/cpp/__init__.py
 create mode 100644 python/pylibraft/pylibraft/matrix/cpp/select_k.pxd
 create mode 100644 python/pylibraft/pylibraft/matrix/select_k.pyx
 rename python/pylibraft/pylibraft/test/{test_brue_force.py => test_brute_force.py} (100%)
 create mode 100644 python/pylibraft/pylibraft/test/test_select_k.py

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 6461492169..62f9ac604e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -70,13 +70,11 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
-
-# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs
-# to have different values for the `Threads::Threads` target. Setting this flag ensures
+# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
+# have different values for the `Threads::Threads` target. Setting this flag ensures
 # `Threads::Threads` is the same value across all builds so that cache hits occur
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-
 include(CMakeDependentOption)
 # cmake_dependent_option( RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for
 # nearest neighbors search on GPU" ON RAFT_COMPILE_LIBRARY OFF )
@@ -329,6 +327,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/distance/specializations/fused_l2_nn_double_int64.cu
     src/distance/specializations/fused_l2_nn_float_int.cu
     src/distance/specializations/fused_l2_nn_float_int64.cu
+    src/matrix/select_k_float_int64_t.cu
     src/matrix/specializations/detail/select_k_float_uint32_t.cu
     src/matrix/specializations/detail/select_k_float_int64_t.cu
     src/matrix/specializations/detail/select_k_half_uint32_t.cu
diff --git a/cpp/include/raft/matrix/select_k.cuh b/cpp/include/raft/matrix/select_k.cuh
index 9a1a14fd73..7951cbdb03 100644
--- a/cpp/include/raft/matrix/select_k.cuh
+++ b/cpp/include/raft/matrix/select_k.cuh
@@ -42,13 +42,13 @@ namespace raft::matrix {
  * @code{.cpp}
  *   using namespace raft;
  *   // get a 2D row-major array of values to search through
- *   auto in_values = {... input device_matrix_view<const float, size_t, row_major> ...}
+ *   auto in_values = {... input device_matrix_view<const float, int64_t, row_major> ...}
  *   // prepare output arrays
- *   auto out_extents = make_extents<size_t>(in_values.extent(0), k);
+ *   auto out_extents = make_extents<int64_t>(in_values.extent(0), k);
  *   auto out_values  = make_device_mdarray<float>(handle, out_extents);
- *   auto out_indices = make_device_mdarray<size_t>(handle, out_extents);
+ *   auto out_indices = make_device_mdarray<int64_t>(handle, out_extents);
  *   // search `k` smallest values in each row
- *   matrix::select_k<float, size_t>(
+ *   matrix::select_k<float, int64_t>(
  *     handle, in_values, std::nullopt, out_values.view(), out_indices.view(), true);
  * @endcode
  *
@@ -76,13 +76,13 @@ namespace raft::matrix {
  */
 template <typename T, typename IdxT>
 void select_k(const device_resources& handle,
-              raft::device_matrix_view<const T, size_t, row_major> in_val,
-              std::optional<raft::device_matrix_view<const IdxT, size_t, row_major>> in_idx,
-              raft::device_matrix_view<T, size_t, row_major> out_val,
-              raft::device_matrix_view<IdxT, size_t, row_major> out_idx,
+              raft::device_matrix_view<const T, int64_t, row_major> in_val,
+              std::optional<raft::device_matrix_view<const IdxT, int64_t, row_major>> in_idx,
+              raft::device_matrix_view<T, int64_t, row_major> out_val,
+              raft::device_matrix_view<IdxT, int64_t, row_major> out_idx,
               bool select_min)
 {
-  RAFT_EXPECTS(out_val.extent(1) <= size_t(std::numeric_limits<int>::max()),
+  RAFT_EXPECTS(out_val.extent(1) <= int64_t(std::numeric_limits<int>::max()),
                "output k must fit the int type.");
   auto batch_size = in_val.extent(0);
   auto len        = in_val.extent(1);
@@ -93,7 +93,7 @@ void select_k(const device_resources& handle,
     RAFT_EXPECTS(batch_size == in_idx->extent(0), "batch sizes must be equal");
     RAFT_EXPECTS(len == in_idx->extent(1), "value and index input lengths must be equal");
   }
-  RAFT_EXPECTS(size_t(k) == out_idx.extent(1), "value and index output lengths must be equal");
+  RAFT_EXPECTS(int64_t(k) == out_idx.extent(1), "value and index output lengths must be equal");
   return detail::select_k<T, IdxT>(in_val.data_handle(),
                                    in_idx.has_value() ? in_idx->data_handle() : nullptr,
                                    batch_size,
diff --git a/cpp/include/raft_runtime/matrix/select_k.hpp b/cpp/include/raft_runtime/matrix/select_k.hpp
new file mode 100644
index 0000000000..08c0e01d0a
--- /dev/null
+++ b/cpp/include/raft_runtime/matrix/select_k.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+
+#include <optional>
+
+namespace raft::runtime::matrix {
+void select_k(const device_resources& handle,
+              raft::device_matrix_view<const float, int64_t, row_major> in_val,
+              std::optional<raft::device_matrix_view<const int64_t, int64_t, row_major>> in_idx,
+              raft::device_matrix_view<float, int64_t, row_major> out_val,
+              raft::device_matrix_view<int64_t, int64_t, row_major> out_idx,
+              bool select_min);
+
+}  // namespace raft::runtime::matrix
diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh
index 188122c9b4..a3535f8ffd 100644
--- a/cpp/internal/raft_internal/matrix/select_k.cuh
+++ b/cpp/internal/raft_internal/matrix/select_k.cuh
@@ -91,12 +91,13 @@ void select_k_impl(const device_resources& handle,
   auto stream = handle.get_stream();
   switch (algo) {
     case Algo::kPublicApi: {
-      auto in_extent   = make_extents<size_t>(batch_size, len);
-      auto out_extent  = make_extents<size_t>(batch_size, k);
-      auto in_span     = make_mdspan<const T, size_t, row_major, false, true>(in, in_extent);
-      auto in_idx_span = make_mdspan<const IdxT, size_t, row_major, false, true>(in_idx, in_extent);
-      auto out_span    = make_mdspan<T, size_t, row_major, false, true>(out, out_extent);
-      auto out_idx_span = make_mdspan<IdxT, size_t, row_major, false, true>(out_idx, out_extent);
+      auto in_extent  = make_extents<int64_t>(batch_size, len);
+      auto out_extent = make_extents<int64_t>(batch_size, k);
+      auto in_span    = make_mdspan<const T, int64_t, row_major, false, true>(in, in_extent);
+      auto in_idx_span =
+        make_mdspan<const IdxT, int64_t, row_major, false, true>(in_idx, in_extent);
+      auto out_span     = make_mdspan<T, int64_t, row_major, false, true>(out, out_extent);
+      auto out_idx_span = make_mdspan<IdxT, int64_t, row_major, false, true>(out_idx, out_extent);
       if (in_idx == nullptr) {
         // NB: std::nullopt prevents automatic inference of the template parameters.
         return matrix::select_k<T, IdxT>(
diff --git a/cpp/src/matrix/select_k_float_int64_t.cu b/cpp/src/matrix/select_k_float_int64_t.cu
new file mode 100644
index 0000000000..309ac50c6b
--- /dev/null
+++ b/cpp/src/matrix/select_k_float_int64_t.cu
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/matrix/select_k.cuh>
+#include <raft/matrix/specializations.cuh>
+
+#include <raft_runtime/matrix/select_k.hpp>
+
+#include <vector>
+
+namespace raft::runtime::matrix {
+
+void select_k(const device_resources& handle,
+              raft::device_matrix_view<const float, int64_t, row_major> in_val,
+              std::optional<raft::device_matrix_view<const int64_t, int64_t, row_major>> in_idx,
+              raft::device_matrix_view<float, int64_t, row_major> out_val,
+              raft::device_matrix_view<int64_t, int64_t, row_major> out_idx,
+              bool select_min)
+{
+  raft::matrix::select_k(handle, in_val, in_idx, out_val, out_idx, select_min);
+}
+}  // namespace raft::runtime::matrix
diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt
index 349a2b08ba..069bd98222 100644
--- a/python/pylibraft/CMakeLists.txt
+++ b/python/pylibraft/CMakeLists.txt
@@ -86,6 +86,7 @@ rapids_cython_init()
 
 add_subdirectory(pylibraft/common)
 add_subdirectory(pylibraft/distance)
+add_subdirectory(pylibraft/matrix)
 add_subdirectory(pylibraft/neighbors)
 add_subdirectory(pylibraft/random)
 add_subdirectory(pylibraft/cluster)
diff --git a/python/pylibraft/pylibraft/matrix/CMakeLists.txt b/python/pylibraft/pylibraft/matrix/CMakeLists.txt
new file mode 100644
index 0000000000..ffba10dea9
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources select_k.pyx)
+set(linked_libraries raft::raft raft::compiled)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX matrix_
+)
diff --git a/python/pylibraft/pylibraft/matrix/__init__.pxd b/python/pylibraft/pylibraft/matrix/__init__.pxd
new file mode 100644
index 0000000000..a7e7b75096
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/__init__.pxd
@@ -0,0 +1,14 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pylibraft/pylibraft/matrix/__init__.py b/python/pylibraft/pylibraft/matrix/__init__.py
new file mode 100644
index 0000000000..5eb35795ed
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from .select_k import select_k
+
+__all__ = ["select_k"]
diff --git a/python/pylibraft/pylibraft/matrix/cpp/__init__.pxd b/python/pylibraft/pylibraft/matrix/cpp/__init__.pxd
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/pylibraft/pylibraft/matrix/cpp/__init__.py b/python/pylibraft/pylibraft/matrix/cpp/__init__.py
new file mode 100644
index 0000000000..8f2cc34855
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/cpp/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd b/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd
new file mode 100644
index 0000000000..ab466fdce6
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd
@@ -0,0 +1,39 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from libc.stdint cimport int64_t
+from libcpp cimport bool
+
+from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major
+from pylibraft.common.cpp.optional cimport optional
+from pylibraft.common.handle cimport device_resources
+
+
+cdef extern from "raft_runtime/matrix/select_k.hpp" \
+        namespace "raft::runtime::matrix" nogil:
+
+    cdef void select_k(const device_resources & handle,
+                       device_matrix_view[float, int64_t, row_major],
+                       optional[device_matrix_view[int64_t,
+                                                   int64_t,
+                                                   row_major]],
+                       device_matrix_view[float, int64_t, row_major],
+                       device_matrix_view[int64_t, int64_t, row_major],
+                       bool) except +
diff --git a/python/pylibraft/pylibraft/matrix/select_k.pyx b/python/pylibraft/pylibraft/matrix/select_k.pyx
new file mode 100644
index 0000000000..fbb1e2e5d3
--- /dev/null
+++ b/python/pylibraft/pylibraft/matrix/select_k.pyx
@@ -0,0 +1,133 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from cython.operator cimport dereference as deref
+from libc.stdint cimport int64_t
+from libcpp cimport bool
+
+import numpy as np
+
+from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray
+from pylibraft.common.handle import auto_sync_handle
+from pylibraft.common.input_validation import is_c_contiguous
+
+from pylibraft.common.cpp.mdspan cimport (
+    device_matrix_view,
+    host_matrix_view,
+    make_device_matrix_view,
+    make_host_matrix_view,
+    row_major,
+)
+from pylibraft.common.cpp.optional cimport optional
+from pylibraft.common.handle cimport device_resources
+from pylibraft.common.mdspan cimport get_dmv_float, get_dmv_int64
+from pylibraft.matrix.cpp.select_k cimport select_k as c_select_k
+
+
+@auto_sync_handle
+@auto_convert_output
+def select_k(dataset, k=None, distances=None, indices=None, select_min=True,
+             handle=None):
+    """
+    Selects the top k items from each row in a matrix
+
+
+    Parameters
+    ----------
+    dataset : array interface compliant matrix, row-major layout,
+        shape (n_rows, dim). Supported dtype [float]
+    k : int
+        Number of items to return for each row.  Optional if indices or
+        distances arrays are given (in which case their second dimension
+        is k).
+    distances :  Optional array interface compliant matrix shape
+                (n_rows, k), dtype float. If supplied,
+                distances will be written here in-place. (default None)
+    indices :  Optional array interface compliant matrix shape
+                (n_rows, k), dtype int64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    select_min: : bool
+        Whether to select the minimum or maximum K items
+
+    {handle_docstring}
+
+    Returns
+    -------
+    distances: array interface compliant object containing resulting distances
+               shape (n_rows, k)
+
+    indices: array interface compliant object containing resulting indices
+             shape (n_rows, k)
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+
+    >>> from pylibraft.matrix import select_k
+
+    >>> n_features = 50
+    >>> n_rows = 1000
+
+    >>> queries = cp.random.random_sample((n_rows, n_features),
+    ...                                   dtype=cp.float32)
+    >>> k = 40
+    >>> distances, ids = select_k(queries, k)
+    >>> distances = cp.asarray(distances)
+    >>> ids = cp.asarray(ids)
+    """
+
+    dataset_cai = cai_wrapper(dataset)
+
+    if k is None:
+        if indices is not None:
+            k = cai_wrapper(indices).shape[1]
+        elif distances is not None:
+            k = cai_wrapper(distances).shape[1]
+        else:
+            raise ValueError("Argument k must be specified if both indices "
+                             "and distances arg is None")
+
+    n_rows = dataset.shape[0]
+    if indices is None:
+        indices = device_ndarray.empty((n_rows, k), dtype='int64')
+
+    if distances is None:
+        distances = device_ndarray.empty((n_rows, k), dtype='float32')
+
+    distances_cai = cai_wrapper(distances)
+    indices_cai = cai_wrapper(indices)
+
+    cdef device_resources* handle_ = \
+        <device_resources*><size_t>handle.getHandle()
+
+    cdef optional[device_matrix_view[int64_t, int64_t, row_major]] in_idx
+
+    if dataset_cai.dtype == np.float32:
+        c_select_k(deref(handle_),
+                   get_dmv_float(dataset_cai, check_shape=True),
+                   in_idx,
+                   get_dmv_float(distances_cai, check_shape=True),
+                   get_dmv_int64(indices_cai, check_shape=True),
+                   <bool>select_min)
+    else:
+        raise TypeError("dtype %s not supported" % dataset_cai.dtype)
+
+    return distances, indices
diff --git a/python/pylibraft/pylibraft/neighbors/brute_force.pyx b/python/pylibraft/pylibraft/neighbors/brute_force.pyx
index dbd888756d..8836307a5a 100644
--- a/python/pylibraft/pylibraft/neighbors/brute_force.pyx
+++ b/python/pylibraft/pylibraft/neighbors/brute_force.pyx
@@ -40,7 +40,6 @@ from pylibraft.common.handle cimport device_resources
 from pylibraft.common.mdspan cimport get_dmv_float, get_dmv_int64
 
 from pylibraft.common.handle import auto_sync_handle
-from pylibraft.common.input_validation import is_c_contiguous
 from pylibraft.common.interruptible import cuda_interruptible
 
 from pylibraft.distance.distance_type cimport DistanceType
@@ -144,7 +143,7 @@ def knn(dataset, queries, k=None, indices=None, distances=None,
             raise ValueError("Argument k must be specified if both indices "
                              "and distances arg is None")
 
-    n_queries = cai_wrapper(queries).shape[0]
+    n_queries = queries_cai.shape[0]
 
     if indices is None:
         indices = device_ndarray.empty((n_queries, k), dtype='int64')
diff --git a/python/pylibraft/pylibraft/test/test_brue_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py
similarity index 100%
rename from python/pylibraft/pylibraft/test/test_brue_force.py
rename to python/pylibraft/pylibraft/test/test_brute_force.py
diff --git a/python/pylibraft/pylibraft/test/test_doctests.py b/python/pylibraft/pylibraft/test/test_doctests.py
index 34be6c55f5..19e5c5c22f 100644
--- a/python/pylibraft/pylibraft/test/test_doctests.py
+++ b/python/pylibraft/pylibraft/test/test_doctests.py
@@ -22,6 +22,7 @@
 
 import pylibraft.cluster
 import pylibraft.distance
+import pylibraft.matrix
 import pylibraft.neighbors
 import pylibraft.random
 
@@ -94,6 +95,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
 DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster))
 DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common))
 DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k))
 DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors))
 DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq))
 DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force))
diff --git a/python/pylibraft/pylibraft/test/test_select_k.py b/python/pylibraft/pylibraft/test/test_select_k.py
new file mode 100644
index 0000000000..203e735b9c
--- /dev/null
+++ b/python/pylibraft/pylibraft/test/test_select_k.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pytest
+
+from pylibraft.common import device_ndarray
+from pylibraft.matrix import select_k
+
+
+@pytest.mark.parametrize("n_rows", [32, 100])
+@pytest.mark.parametrize("n_cols", [40, 100])
+@pytest.mark.parametrize("k", [1, 5, 16, 35])
+@pytest.mark.parametrize("inplace", [True, False])
+def test_select_k(n_rows, n_cols, k, inplace):
+    dataset = np.random.random_sample((n_rows, n_cols)).astype("float32")
+    dataset_device = device_ndarray(dataset)
+
+    indices = np.zeros((n_rows, k), dtype="int64")
+    distances = np.zeros((n_rows, k), dtype="float32")
+    indices_device = device_ndarray(indices)
+    distances_device = device_ndarray(distances)
+
+    ret_distances, ret_indices = select_k(
+        dataset_device,
+        k=k,
+        distances=distances_device,
+        indices=indices_device,
+    )
+
+    distances_device = ret_distances if not inplace else distances_device
+    actual_distances = distances_device.copy_to_host()
+    argsort = np.argsort(dataset, axis=1)
+
+    for i in range(dataset.shape[0]):
+        expected_indices = argsort[i]
+        gpu_dists = actual_distances[i]
+
+        cpu_ordered = dataset[i, expected_indices]
+        np.testing.assert_allclose(
+            cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4
+        )

From ff58b8bbbf5ecdd7b78a628191ca0a15d3d62767 Mon Sep 17 00:00:00 2001
From: Tamas Bela Feher <tfeher@nvidia.com>
Date: Tue, 18 Apr 2023 17:06:50 +0200
Subject: [PATCH 16/78] Fix dim param for IVF-PQ wrapper in ANN bench (#1427)

The `index_` is not yet initialized. To construct the dataset view, we need to use the `dim_` variable which was set in the constructor.

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1427
---
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index 70dff81847..517272e6cf 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -42,6 +42,7 @@ template <typename T, typename IdxT>
 class RaftIvfPQ : public ANN<T> {
  public:
   using typename ANN<T>::AnnSearchParam;
+  using ANN<T>::dim_;
 
   struct SearchParam : public AnnSearchParam {
     raft::neighbors::ivf_pq::search_params pq_param;
@@ -118,7 +119,7 @@ void RaftIvfPQ<T, IdxT>::load(const std::string& file)
 template <typename T, typename IdxT>
 void RaftIvfPQ<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
-  auto dataset_v = raft::make_device_matrix_view<const T, IdxT>(dataset, IdxT(nrow), index_->dim());
+  auto dataset_v = raft::make_device_matrix_view<const T, IdxT>(dataset, IdxT(nrow), dim_);
 
   index_.emplace(raft::runtime::neighbors::ivf_pq::build(handle_, index_params_, dataset_v));
   return;

From 5d68c5742be5d895f5650f42bdd3c27348ec56eb Mon Sep 17 00:00:00 2001
From: Sevag H <shanssian@nvidia.com>
Date: Tue, 18 Apr 2023 12:14:52 -0400
Subject: [PATCH 17/78] Remove wheel pytest verbosity (#1424)

This PR removes the verbose flag from wheel pytest commands

Authors:
  - Sevag H (https://github.com/sevagh)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1424
---
 .github/workflows/pr.yaml   | 4 ++--
 .github/workflows/test.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index fc8c8d516e..c51d5c0a34 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -83,7 +83,7 @@ jobs:
       test-before-amd64: "pip install cupy-cuda11x"
       # On arm also need to install cupy from the specific webpage.
       test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
-      test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test"
+      test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
       test-smoketest: "python ./ci/wheel_smoke_test_pylibraft.py"
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
@@ -105,5 +105,5 @@ jobs:
       # Always want to test against latest dask/distributed.
       test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
-      test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
+      test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test"
       test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index dc8f7b6f2b..05e96a6dff 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -41,7 +41,7 @@ jobs:
       package-name: pylibraft
       test-before-amd64: "pip install cupy-cuda11x"
       test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
-      test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test"
+      test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
@@ -53,4 +53,4 @@ jobs:
       package-name: raft_dask
       test-before-amd64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       test-before-arm64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
-      test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
+      test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test"

From b1939564ed3d38095efe4cdc3049c9acf05624fd Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 18 Apr 2023 14:10:12 -0400
Subject: [PATCH 18/78] Removing cuda stream view include from mdarray (#1429)

cc @wphicks

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1429
---
 cpp/include/raft/core/mdarray.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index 61c1b500e6..88f90485dd 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -29,7 +29,6 @@
 #include <raft/core/mdspan.hpp>
 #include <raft/core/mdspan_types.hpp>
 #include <raft/core/memory_type.hpp>
-#include <rmm/cuda_stream_view.hpp>
 
 namespace raft {
 /**
@@ -45,11 +44,11 @@ namespace raft {
 template <typename Base>
 class array_interface {
   /**
-   * @brief Get a mdspan that can be passed down to CUDA kernels.
+   * @brief Get an mdspan
    */
   auto view() noexcept { return static_cast<Base*>(this)->view(); }
   /**
-   * @brief Get a mdspan that can be passed down to CUDA kernels.
+   * @brief Get an mdspan<const T>
    */
   auto view() const noexcept { return static_cast<Base*>(this)->view(); }
 };
@@ -108,7 +107,8 @@ inline constexpr bool is_array_interface_v = is_array_interface<Tn...>::value;
  *   template.
  *
  * - Most of the constructors from the reference implementation is removed to make sure
- *   CUDA stream is honorred.
+ *   CUDA stream is honored. Note that this class is not coupled to CUDA and therefore
+ *   will only be used in the case where the device variant is used.
  *
  * - unique_size is not implemented, which is still working in progress in the proposal
  *
@@ -220,11 +220,11 @@ class mdarray
 #undef RAFT_MDARRAY_CTOR_CONSTEXPR
 
   /**
-   * @brief Get a mdspan that can be passed down to CUDA kernels.
+   * @brief Get an mdspan
    */
   auto view() noexcept { return view_type(c_.data(), map_, cp_.make_accessor_policy()); }
   /**
-   * @brief Get a mdspan that can be passed down to CUDA kernels.
+   * @brief Get an mdspan<const T>
    */
   auto view() const noexcept
   {

From 6b021f5a562a52a0488c8393f38a7c50af81ba18 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 19 Apr 2023 12:50:13 -0700
Subject: [PATCH 19/78] Use nvtx3 includes. (#1431)

This PR updates raft to use `#include <nvtx3/nvToolsExt.h>` instead of `#include <nvToolsExt.h>`. This ensures we fetch the header-only NVTX v3. See NVTX docs for more information: https://nvidia.github.io/NVTX/#c-and-c

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1431
---
 cpp/bench/ann/src/common/benchmark.hpp | 2 +-
 cpp/include/raft/core/detail/nvtx.hpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index b4d8fbeee3..c34b95010f 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #ifdef NVTX
-#include <nvToolsExt.h>
+#include <nvtx3/nvToolsExt.h>
 #endif
 #include <unistd.h>
 
diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp
index 4a16ec81bd..adbf3a3666 100644
--- a/cpp/include/raft/core/detail/nvtx.hpp
+++ b/cpp/include/raft/core/detail/nvtx.hpp
@@ -25,7 +25,7 @@ namespace raft::common::nvtx::detail {
 #include <cstdint>
 #include <cstdlib>
 #include <mutex>
-#include <nvToolsExt.h>
+#include <nvtx3/nvToolsExt.h>
 #include <string>
 #include <type_traits>
 #include <unordered_map>

From fa51c47f075d78526467f947aa5dc00b781e391a Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Wed, 19 Apr 2023 14:56:48 -0700
Subject: [PATCH 20/78] Remove MetricProcessor code from brute_force::knn
 (#1426)

Stop using the MetricProcessor code to preprocess the inputs to the bfknn calls. Since the pairwise distance API supports both cosine and correlation distance, this wasn't required anymore - and it introduced NaN values to the input when passed a dataset with one of the rows being all zero.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1426
---
 .../raft/neighbors/detail/knn_brute_force.cuh | 91 ++++++++++---------
 .../pylibraft/test/test_brute_force.py        |  3 -
 2 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
index a776ce2586..b3c4818e70 100644
--- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
@@ -56,7 +56,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
                            size_t m,
                            size_t n,
                            size_t d,
-                           int k,
+                           size_t k,
                            ElementType* distances,  // size (m, k)
                            IndexType* indices,      // size (m, k)
                            raft::distance::DistanceType metric,
@@ -79,7 +79,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
   if (max_col_tile_size && (tile_cols > max_col_tile_size)) { tile_cols = max_col_tile_size; }
 
   // tile_cols must be at least k items
-  tile_cols = std::max(tile_cols, static_cast<size_t>(k));
+  tile_cols = std::max(tile_cols, k);
 
   // stores pairwise distances for the current tile
   rmm::device_uvector<ElementType> temp_distances(tile_rows * tile_cols, stream);
@@ -90,13 +90,34 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
   rmm::device_uvector<ElementType> search_norms(0, stream);
   rmm::device_uvector<ElementType> index_norms(0, stream);
   if (metric == raft::distance::DistanceType::L2Expanded ||
-      metric == raft::distance::DistanceType::L2SqrtExpanded) {
+      metric == raft::distance::DistanceType::L2SqrtExpanded ||
+      metric == raft::distance::DistanceType::CosineExpanded) {
     search_norms.resize(m, stream);
     index_norms.resize(n, stream);
-    raft::linalg::rowNorm(
-      search_norms.data(), search, d, m, raft::linalg::NormType::L2Norm, true, stream);
-    raft::linalg::rowNorm(
-      index_norms.data(), index, d, n, raft::linalg::NormType::L2Norm, true, stream);
+    // cosine needs the l2norm, where as l2 distances needs the squared norm
+    if (metric == raft::distance::DistanceType::CosineExpanded) {
+      raft::linalg::rowNorm(search_norms.data(),
+                            search,
+                            d,
+                            m,
+                            raft::linalg::NormType::L2Norm,
+                            true,
+                            stream,
+                            raft::sqrt_op{});
+      raft::linalg::rowNorm(index_norms.data(),
+                            index,
+                            d,
+                            n,
+                            raft::linalg::NormType::L2Norm,
+                            true,
+                            stream,
+                            raft::sqrt_op{});
+    } else {
+      raft::linalg::rowNorm(
+        search_norms.data(), search, d, m, raft::linalg::NormType::L2Norm, true, stream);
+      raft::linalg::rowNorm(
+        index_norms.data(), index, d, n, raft::linalg::NormType::L2Norm, true, stream);
+    }
     pairwise_metric = raft::distance::DistanceType::InnerProduct;
   }
 
@@ -109,20 +130,17 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
   // in which case the number of columns here is too high in the temp output.
   // adjust if necessary
   auto last_col_tile_size = n % tile_cols;
-  if (last_col_tile_size && (last_col_tile_size < static_cast<size_t>(k))) {
-    temp_out_cols -= k - last_col_tile_size;
-  }
+  if (last_col_tile_size && (last_col_tile_size < k)) { temp_out_cols -= k - last_col_tile_size; }
 
   // if we have less than k items in the index, we should fill out the result
   // to indicate that we are missing items (and match behaviour in faiss)
-  if (n < static_cast<size_t>(k)) {
+  if (n < k) {
     raft::matrix::fill(handle,
-                       raft::make_device_matrix_view(distances, m, static_cast<size_t>(k)),
+                       raft::make_device_matrix_view(distances, m, k),
                        std::numeric_limits<ElementType>::lowest());
 
     if constexpr (std::is_signed_v<IndexType>) {
-      raft::matrix::fill(
-        handle, raft::make_device_matrix_view(indices, m, static_cast<size_t>(k)), IndexType{-1});
+      raft::matrix::fill(handle, raft::make_device_matrix_view(indices, m, k), IndexType{-1});
     }
   }
 
@@ -136,7 +154,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
 
     for (size_t j = 0; j < n; j += tile_cols) {
       size_t current_centroid_size = std::min(tile_cols, n - j);
-      size_t current_k             = std::min(current_centroid_size, static_cast<size_t>(k));
+      size_t current_k             = std::min(current_centroid_size, k);
 
       // calculate the top-k elements for the current tile, by calculating the
       // full pairwise distance for the tile - and then selecting the top-k from that
@@ -176,6 +194,21 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
             val = distance_epilogue(val, row, col);
             return val;
           });
+      } else if (metric == raft::distance::DistanceType::CosineExpanded) {
+        auto row_norms = search_norms.data();
+        auto col_norms = index_norms.data();
+        auto dist      = temp_distances.data();
+
+        raft::linalg::map_offset(
+          handle,
+          raft::make_device_vector_view(dist, current_query_size * current_centroid_size),
+          [=] __device__(IndexType idx) {
+            IndexType row = i + (idx / current_centroid_size);
+            IndexType col = j + (idx % current_centroid_size);
+            auto val      = 1.0 - dist[idx] / (row_norms[row] * col_norms[col]);
+            val           = distance_epilogue(val, row, col);
+            return val;
+          });
       } else {
         // if we're not l2 distance, and we have a distance epilogue - run it now
         if constexpr (!std::is_same_v<DistanceEpilogue, raft::identity_op>) {
@@ -310,18 +343,6 @@ void brute_force_knn_impl(
     id_ranges = translations;
   }
 
-  // perform preprocessing
-  std::unique_ptr<MetricProcessor<value_t>> query_metric_processor =
-    create_processor<value_t>(metric, n, D, k, rowMajorQuery, userStream);
-  query_metric_processor->preprocess(search_items);
-
-  std::vector<std::unique_ptr<MetricProcessor<value_t>>> metric_processors(input.size());
-  for (size_t i = 0; i < input.size(); i++) {
-    metric_processors[i] =
-      create_processor<value_t>(metric, sizes[i], D, k, rowMajorQuery, userStream);
-    metric_processors[i]->preprocess(input[i]);
-  }
-
   int device;
   RAFT_CUDA_TRY(cudaGetDevice(&device));
 
@@ -430,14 +451,6 @@ void brute_force_knn_impl(
             raft::linalg::transpose(handle, input[i], index, sizes[i], D, stream);
           }
 
-          // cosine/correlation are handled by metric processor, use IP distance
-          // for brute force knn call.
-          auto tiled_metric = metric;
-          if (metric == raft::distance::DistanceType::CosineExpanded ||
-              metric == raft::distance::DistanceType::CorrelationExpanded) {
-            tiled_metric = raft::distance::DistanceType::InnerProduct;
-          }
-
           tiled_brute_force_knn<value_t, IdxType>(stream_pool_handle,
                                                   search,
                                                   index,
@@ -447,7 +460,7 @@ void brute_force_knn_impl(
                                                   k,
                                                   out_d_ptr,
                                                   out_i_ptr,
-                                                  tiled_metric,
+                                                  metric,
                                                   metricArg,
                                                   0,
                                                   0,
@@ -470,12 +483,6 @@ void brute_force_knn_impl(
     knn_merge_parts(out_D, out_I, res_D, res_I, n, input.size(), k, userStream, trans.data());
   }
 
-  query_metric_processor->revert(search_items);
-  query_metric_processor->postprocess(out_D);
-  for (size_t i = 0; i < input.size(); i++) {
-    metric_processors[i]->revert(input[i]);
-  }
-
   if (translations == nullptr) delete id_ranges;
 };
 
diff --git a/python/pylibraft/pylibraft/test/test_brute_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py
index f349be892d..0bd5e6eaaf 100644
--- a/python/pylibraft/pylibraft/test/test_brute_force.py
+++ b/python/pylibraft/pylibraft/test/test_brute_force.py
@@ -90,9 +90,6 @@ def test_knn(
         expected_indices = argsort[i]
         gpu_dists = actual_distances[i]
 
-        if metric == "correlation" or metric == "cosine":
-            gpu_dists = gpu_dists[::-1]
-
         cpu_ordered = pw_dists[i, expected_indices]
         np.testing.assert_allclose(
             cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4

From 6105f0e4d326f673447efb9576cc8adc0d1f9caa Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Wed, 19 Apr 2023 18:41:14 -0400
Subject: [PATCH 21/78] Minor Updates to Sparse Structures (#1432)

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1432
---
 cpp/include/raft/core/coo_matrix.hpp        | 35 ++++++++++++--------
 cpp/include/raft/core/csr_matrix.hpp        | 26 +++++++++------
 cpp/include/raft/core/device_coo_matrix.hpp | 36 +++++++++------------
 cpp/include/raft/core/device_csr_matrix.hpp | 31 ++++++++----------
 cpp/include/raft/core/host_coo_matrix.hpp   | 33 +++++++++----------
 cpp/include/raft/core/host_csr_matrix.hpp   | 33 +++++++++----------
 cpp/include/raft/core/sparse_types.hpp      | 17 +++++-----
 7 files changed, 109 insertions(+), 102 deletions(-)

diff --git a/cpp/include/raft/core/coo_matrix.hpp b/cpp/include/raft/core/coo_matrix.hpp
index efab8a1601..a5f7c05493 100644
--- a/cpp/include/raft/core/coo_matrix.hpp
+++ b/cpp/include/raft/core/coo_matrix.hpp
@@ -71,12 +71,6 @@ class coordinate_structure_view
   {
   }
 
-  /**
-   * Create a view from this view. Note that this is for interface compatibility
-   * @return
-   */
-  view_type view() { return view_type(rows_, cols_, this->get_n_rows(), this->get_n_cols()); }
-
   /**
    * Return span containing underlying rows array
    * @return span containing underlying rows array
@@ -209,6 +203,10 @@ class coo_matrix_view
                               coordinate_structure_view<RowType, ColType, NZType, is_device>,
                               is_device> {
  public:
+  using element_type = ElementType;
+  using row_type     = RowType;
+  using col_type     = ColType;
+  using nnz_type     = NZType;
   coo_matrix_view(raft::span<ElementType, is_device> element_span,
                   coordinate_structure_view<RowType, ColType, NZType, is_device> structure_view)
     : sparse_matrix_view<ElementType,
@@ -238,6 +236,9 @@ class coo_matrix
                          ContainerPolicy> {
  public:
   using element_type        = ElementType;
+  using row_type            = RowType;
+  using col_type            = ColType;
+  using nnz_type            = NZType;
   using structure_view_type = typename structure_type::view_type;
   using container_type      = typename ContainerPolicy<ElementType>::container_type;
   using sparse_matrix_type =
@@ -258,14 +259,9 @@ class coo_matrix
   // Constructor that owns the data but not the structure
   template <SparsityType sparsity_type_ = get_sparsity_type(),
             typename = typename std::enable_if_t<sparsity_type_ == SparsityType::PRESERVING>>
-  coo_matrix(raft::resources const& handle, std::shared_ptr<structure_type> structure) noexcept(
+  coo_matrix(raft::resources const& handle, structure_type structure) noexcept(
     std::is_nothrow_default_constructible_v<container_type>)
     : sparse_matrix_type(handle, structure){};
-  /**
-   * Return a view of the structure underlying this matrix
-   * @return
-   */
-  structure_view_type structure_view() { return this->structure_.get()->view(); }
 
   /**
    * Initialize the sparsity on this instance if it was not known upon construction
@@ -277,7 +273,20 @@ class coo_matrix
   void initialize_sparsity(NZType nnz)
   {
     sparse_matrix_type::initialize_sparsity(nnz);
-    this->structure_.get()->initialize_sparsity(nnz);
+    this->structure_.initialize_sparsity(nnz);
+  }
+
+  /**
+   * Return a view of the structure underlying this matrix
+   * @return
+   */
+  structure_view_type structure_view()
+  {
+    if constexpr (get_sparsity_type() == SparsityType::OWNING) {
+      return this->structure_.view();
+    } else {
+      return this->structure_;
+    }
   }
 };
 }  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp
index fac656b3f9..c37cfa41c8 100644
--- a/cpp/include/raft/core/csr_matrix.hpp
+++ b/cpp/include/raft/core/csr_matrix.hpp
@@ -87,12 +87,6 @@ class compressed_structure_view
    */
   span<indices_type, is_device> get_indices() override { return indices_; }
 
-  /**
-   * Create a view from this view. Note that this is for interface compatibility
-   * @return
-   */
-  view_type view() { return view_type(indptr_, indices_, this->get_n_cols()); }
-
  protected:
   raft::span<indptr_type, is_device> indptr_;
   raft::span<indices_type, is_device> indices_;
@@ -221,6 +215,10 @@ class csr_matrix_view
                               compressed_structure_view<IndptrType, IndicesType, NZType, is_device>,
                               is_device> {
  public:
+  using element_type = ElementType;
+  using indptr_type  = IndptrType;
+  using indices_type = IndicesType;
+  using nnz_type     = NZType;
   csr_matrix_view(
     raft::span<ElementType, is_device> element_span,
     compressed_structure_view<IndptrType, IndicesType, NZType, is_device> structure_view)
@@ -249,6 +247,9 @@ class csr_matrix
                          ContainerPolicy> {
  public:
   using element_type        = ElementType;
+  using indptr_type         = IndptrType;
+  using indices_type        = IndicesType;
+  using nnz_type            = NZType;
   using structure_view_type = typename structure_type::view_type;
   static constexpr auto get_sparsity_type() { return sparsity_type; }
   using sparse_matrix_type =
@@ -271,7 +272,7 @@ class csr_matrix
 
   template <SparsityType sparsity_type_ = get_sparsity_type(),
             typename = typename std::enable_if_t<sparsity_type_ == SparsityType::PRESERVING>>
-  csr_matrix(raft::resources const& handle, std::shared_ptr<structure_type> structure) noexcept(
+  csr_matrix(raft::resources const& handle, structure_type structure) noexcept(
     std::is_nothrow_default_constructible_v<container_type>)
     : sparse_matrix_type(handle, structure){};
 
@@ -284,13 +285,20 @@ class csr_matrix
   void initialize_sparsity(NZType nnz)
   {
     sparse_matrix_type::initialize_sparsity(nnz);
-    this->structure_.get()->initialize_sparsity(nnz);
+    this->structure_.initialize_sparsity(nnz);
   }
 
   /**
    * Return a view of the structure underlying this matrix
    * @return
    */
-  structure_view_type structure_view() { return this->structure_.get()->view(); }
+  structure_view_type structure_view()
+  {
+    if constexpr (get_sparsity_type() == SparsityType::OWNING) {
+      return this->structure_.view();
+    } else {
+      return this->structure_;
+    }
+  }
 };
 }  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp
index b1e9ca30fc..35be67431d 100644
--- a/cpp/include/raft/core/device_coo_matrix.hpp
+++ b/cpp/include/raft/core/device_coo_matrix.hpp
@@ -174,16 +174,15 @@ auto make_device_coo_matrix(raft::resources const& handle,
  * @tparam ColType
  * @tparam NZType
  * @param[in] handle raft handle for managing expensive device resources
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return a sparsity-preserving sparse matrix in coordinate (coo) format
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_device_coo_matrix(raft::resources const& handle,
-                            device_coordinate_structure_view<RowType, ColType, NZType> structure_)
+                            device_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
-  return device_sparsity_preserving_coo_matrix<ElementType, RowType, ColType, NZType>(
-    handle,
-    std::make_shared<device_coordinate_structure_view<RowType, ColType, NZType>>(structure_));
+  return device_sparsity_preserving_coo_matrix<ElementType, RowType, ColType, NZType>(handle,
+                                                                                      structure);
 }
 
 /**
@@ -212,16 +211,15 @@ auto make_device_coo_matrix(raft::resources const& handle,
  * @tparam ColType
  * @tparam NZType
  * @param[in] ptr a pointer to array of nonzero matrix elements on device (size nnz)
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return a sparsity-preserving sparse matrix in coordinate (coo) format
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_device_coo_matrix_view(
-  ElementType* ptr, device_coordinate_structure_view<RowType, ColType, NZType> structure_)
+  ElementType* ptr, device_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
   return device_coo_matrix_view<ElementType, RowType, ColType, NZType>(
-    raft::device_span<ElementType>(ptr, structure_.get_nnz()),
-    std::make_shared<device_coordinate_structure_view<RowType, ColType, NZType>>(structure_));
+    raft::device_span<ElementType>(ptr, structure.get_nnz()), structure);
 }
 
 /**
@@ -251,19 +249,17 @@ auto make_device_coo_matrix_view(
  * @tparam ColType
  * @tparam NZType
  * @param[in] elements a device span containing nonzero matrix elements (size nnz)
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_device_coo_matrix_view(
   raft::device_span<ElementType> elements,
-  device_coordinate_structure_view<RowType, ColType, NZType> structure_)
+  device_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
-  RAFT_EXPECTS(elements.size() == structure_.get_nnz(),
+  RAFT_EXPECTS(elements.size() == structure.get_nnz(),
                "Size of elements must be equal to the nnz from the structure");
-  return device_coo_matrix_view<ElementType, RowType, ColType, NZType>(
-    elements,
-    std::make_shared<device_coordinate_structure_view<RowType, ColType, NZType>>(structure_));
+  return device_coo_matrix_view<ElementType, RowType, ColType, NZType>(elements, structure);
 }
 
 /**
@@ -338,7 +334,7 @@ auto make_device_coordinate_structure(raft::resources const& handle,
  * @return a sparsity-preserving coordinate structural view
  */
 template <typename RowType, typename ColType, typename NZType>
-auto make_device_coo_structure_view(
+auto make_device_coordinate_structure_view(
   RowType* rows, ColType* cols, RowType n_rows, ColType n_cols, NZType nnz)
 {
   return device_coordinate_structure_view<RowType, ColType, NZType>(
@@ -376,10 +372,10 @@ auto make_device_coo_structure_view(
  * @return a sparsity-preserving coordinate structural view
  */
 template <typename RowType, typename ColType, typename NZType>
-auto make_device_coo_structure_view(raft::device_span<RowType> rows,
-                                    raft::device_span<ColType> cols,
-                                    RowType n_rows,
-                                    ColType n_cols)
+auto make_device_coordinate_structure_view(raft::device_span<RowType> rows,
+                                           raft::device_span<ColType> cols,
+                                           RowType n_rows,
+                                           ColType n_cols)
 {
   return device_coordinate_structure_view<RowType, ColType, NZType>(rows, cols, n_rows, n_cols);
 }
diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp
index 59cabacf6d..e4ec15f9bd 100644
--- a/cpp/include/raft/core/device_csr_matrix.hpp
+++ b/cpp/include/raft/core/device_csr_matrix.hpp
@@ -189,7 +189,7 @@ auto make_device_csr_matrix(raft::device_resources const& handle,
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] handle raft handle for managing expensive device resources
- * @param[in] structure_ a sparsity-preserving compressed structural view
+ * @param[in] structure a sparsity-preserving compressed structural view
  * @return a sparsity-preserving sparse matrix in compressed (csr) format
  */
 template <typename ElementType,
@@ -198,12 +198,10 @@ template <typename ElementType,
           typename NZType = uint64_t>
 auto make_device_csr_matrix(
   raft::device_resources const& handle,
-  device_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+  device_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
   return device_sparsity_preserving_csr_matrix<ElementType, IndptrType, IndicesType, NZType>(
-    handle,
-    std::make_shared<device_compressed_structure_view<IndptrType, IndicesType, NZType>>(
-      structure_));
+    handle, structure);
 }
 
 /**
@@ -232,7 +230,7 @@ auto make_device_csr_matrix(
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] ptr a pointer to array of nonzero matrix elements on device (size nnz)
- * @param[in] structure_ a sparsity-preserving compressed sparse structural view
+ * @param[in] structure a sparsity-preserving compressed sparse structural view
  * @return a sparsity-preserving csr matrix view
  */
 template <typename ElementType,
@@ -240,10 +238,10 @@ template <typename ElementType,
           typename IndicesType,
           typename NZType = uint64_t>
 auto make_device_csr_matrix_view(
-  ElementType* ptr, device_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+  ElementType* ptr, device_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
   return device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(
-    raft::device_span<ElementType>(ptr, structure_.get_nnz()), std::make_shared(structure_));
+    raft::device_span<ElementType>(ptr, structure.get_nnz()), structure);
 }
 
 /**
@@ -273,7 +271,7 @@ auto make_device_csr_matrix_view(
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] elements device span containing array of matrix elements (size nnz)
- * @param[in] structure_ a sparsity-preserving structural view
+ * @param[in] structure a sparsity-preserving structural view
  * @return a sparsity-preserving csr matrix view
  */
 template <typename ElementType,
@@ -282,12 +280,11 @@ template <typename ElementType,
           typename NZType = uint64_t>
 auto make_device_csr_matrix_view(
   raft::device_span<ElementType> elements,
-  device_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+  device_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
-  RAFT_EXPECTS(elements.size() == structure_.get_nnz(),
+  RAFT_EXPECTS(elements.size() == structure.get_nnz(),
                "Size of elements must be equal to the nnz from the structure");
-  return device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(
-    elements, std::make_shared(structure_));
+  return device_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(elements, structure);
 }
 
 /**
@@ -365,7 +362,7 @@ auto make_device_compressed_structure(raft::device_resources const& handle,
  * @return a sparsity-preserving compressed structural view
  */
 template <typename IndptrType, typename IndicesType, typename NZType = uint64_t>
-auto make_device_csr_structure_view(
+auto make_device_compressed_structure_view(
   IndptrType* indptr, IndicesType* indices, IndptrType n_rows, IndicesType n_cols, NZType nnz)
 {
   return device_compressed_structure_view<IndptrType, IndicesType, NZType>(
@@ -408,9 +405,9 @@ auto make_device_csr_structure_view(
  *
  */
 template <typename IndptrType, typename IndicesType, typename NZType = uint64_t>
-auto make_device_csr_structure_view(raft::device_span<IndptrType> indptr,
-                                    raft::device_span<IndicesType> indices,
-                                    IndicesType n_cols)
+auto make_device_compressed_structure_view(raft::device_span<IndptrType> indptr,
+                                           raft::device_span<IndicesType> indices,
+                                           IndicesType n_cols)
 {
   return device_compressed_structure_view<IndptrType, IndicesType, NZType>(indptr, indices, n_cols);
 }
diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp
index 45ec278a7d..8fabf5aa95 100644
--- a/cpp/include/raft/core/host_coo_matrix.hpp
+++ b/cpp/include/raft/core/host_coo_matrix.hpp
@@ -173,15 +173,15 @@ auto make_host_coo_matrix(raft::resources const& handle,
  * @tparam ColType
  * @tparam NZType
  * @param[in] handle raft handle for managing expensive resources
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return a sparsity-preserving sparse matrix in coordinate (coo) format
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_host_coo_matrix(raft::resources const& handle,
-                          host_coordinate_structure_view<RowType, ColType, NZType> structure_)
+                          host_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
-  return host_sparsity_preserving_coo_matrix<ElementType, RowType, ColType, NZType>(
-    handle, std::make_shared<host_coordinate_structure_view<RowType, ColType, NZType>>(structure_));
+  return host_sparsity_preserving_coo_matrix<ElementType, RowType, ColType, NZType>(handle,
+                                                                                    structure);
 }
 
 /**
@@ -210,15 +210,15 @@ auto make_host_coo_matrix(raft::resources const& handle,
  * @tparam ColType
  * @tparam NZType
  * @param[in] ptr a pointer to array of nonzero matrix elements on host (size nnz)
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return a sparsity-preserving sparse matrix in coordinate (coo) format
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_host_coo_matrix_view(ElementType* ptr,
-                               host_coordinate_structure_view<RowType, ColType, NZType> structure_)
+                               host_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
   return host_coo_matrix_view<ElementType, RowType, ColType, NZType>(
-    raft::host_span<ElementType>(ptr, structure_.get_nnz()), std::make_shared(structure_));
+    raft::host_span<ElementType>(ptr, structure.get_nnz()), structure);
 }
 
 /**
@@ -248,17 +248,16 @@ auto make_host_coo_matrix_view(ElementType* ptr,
  * @tparam ColType
  * @tparam NZType
  * @param[in] elements a host span containing nonzero matrix elements (size nnz)
- * @param[in] structure_ a sparsity-preserving coordinate structural view
+ * @param[in] structure a sparsity-preserving coordinate structural view
  * @return
  */
 template <typename ElementType, typename RowType, typename ColType, typename NZType>
 auto make_host_coo_matrix_view(raft::host_span<ElementType> elements,
-                               host_coordinate_structure_view<RowType, ColType, NZType> structure_)
+                               host_coordinate_structure_view<RowType, ColType, NZType> structure)
 {
-  RAFT_EXPECTS(elements.size() == structure_.get_nnz(),
+  RAFT_EXPECTS(elements.size() == structure.get_nnz(),
                "Size of elements must be equal to the nnz from the structure");
-  return host_coo_matrix_view<ElementType, RowType, ColType, NZType>(elements,
-                                                                     std::make_shared(structure_));
+  return host_coo_matrix_view<ElementType, RowType, ColType, NZType>(elements, structure);
 }
 
 /**
@@ -333,7 +332,7 @@ auto make_host_coordinate_structure(raft::resources const& handle,
  * @return a sparsity-preserving coordinate structural view
  */
 template <typename RowType, typename ColType, typename NZType>
-auto make_host_coo_structure_view(
+auto make_host_coordinate_structure_view(
   RowType* rows, ColType* cols, RowType n_rows, ColType n_cols, NZType nnz)
 {
   return host_coordinate_structure_view<RowType, ColType, NZType>(
@@ -371,10 +370,10 @@ auto make_host_coo_structure_view(
  * @return a sparsity-preserving coordinate structural view
  */
 template <typename RowType, typename ColType, typename NZType>
-auto make_host_coo_structure_view(raft::host_span<RowType> rows,
-                                  raft::host_span<ColType> cols,
-                                  RowType n_rows,
-                                  ColType n_cols)
+auto make_host_coordinate_structure_view(raft::host_span<RowType> rows,
+                                         raft::host_span<ColType> cols,
+                                         RowType n_rows,
+                                         ColType n_cols)
 {
   return host_coordinate_structure_view<RowType, ColType, NZType>(rows, cols, n_rows, n_cols);
 }
diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp
index 437f60814e..c64bcdcea6 100644
--- a/cpp/include/raft/core/host_csr_matrix.hpp
+++ b/cpp/include/raft/core/host_csr_matrix.hpp
@@ -189,20 +189,18 @@ auto make_host_csr_matrix(raft::resources const& handle,
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] handle raft handle for managing expensive resources
- * @param[in] structure_ a sparsity-preserving compressed structural view
+ * @param[in] structure a sparsity-preserving compressed structural view
  * @return a sparsity-preserving sparse matrix in compressed (csr) format
  */
 template <typename ElementType,
           typename IndptrType,
           typename IndicesType,
           typename NZType = uint64_t>
-auto make_host_csr_matrix(
-  raft::resources const& handle,
-  host_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+auto make_host_csr_matrix(raft::resources const& handle,
+                          host_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
   return host_sparsity_preserving_csr_matrix<ElementType, IndptrType, IndicesType, NZType>(
-    handle,
-    std::make_shared<host_compressed_structure_view<IndptrType, IndicesType, NZType>>(structure_));
+    handle, structure);
 }
 
 /**
@@ -231,7 +229,7 @@ auto make_host_csr_matrix(
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] ptr a pointer to array of nonzero matrix elements on host (size nnz)
- * @param[in] structure_ a sparsity-preserving compressed sparse structural view
+ * @param[in] structure a sparsity-preserving compressed sparse structural view
  * @return a sparsity-preserving csr matrix view
  */
 template <typename ElementType,
@@ -239,10 +237,10 @@ template <typename ElementType,
           typename IndicesType,
           typename NZType = uint64_t>
 auto make_host_csr_matrix_view(
-  ElementType* ptr, host_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+  ElementType* ptr, host_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
   return host_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(
-    raft::host_span<ElementType>(ptr, structure_.get_nnz()), std::make_shared(structure_));
+    raft::host_span<ElementType>(ptr, structure.get_nnz()), structure);
 }
 
 /**
@@ -272,7 +270,7 @@ auto make_host_csr_matrix_view(
  * @tparam IndicesType
  * @tparam NZType
  * @param[in] elements host span containing array of matrix elements (size nnz)
- * @param[in] structure_ a sparsity-preserving structural view
+ * @param[in] structure a sparsity-preserving structural view
  * @return a sparsity-preserving csr matrix view
  */
 template <typename ElementType,
@@ -281,12 +279,11 @@ template <typename ElementType,
           typename NZType = uint64_t>
 auto make_host_csr_matrix_view(
   raft::host_span<ElementType> elements,
-  host_compressed_structure_view<IndptrType, IndicesType, NZType> structure_)
+  host_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
-  RAFT_EXPECTS(elements.size() == structure_.get_nnz(),
+  RAFT_EXPECTS(elements.size() == structure.get_nnz(),
                "Size of elements must be equal to the nnz from the structure");
-  return host_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(
-    elements, std::make_shared(structure_));
+  return host_csr_matrix_view<ElementType, IndptrType, IndicesType, NZType>(elements, structure);
 }
 
 /**
@@ -365,7 +362,7 @@ auto make_host_compressed_structure(raft::resources const& handle,
  * @return a sparsity-preserving compressed structural view
  */
 template <typename IndptrType, typename IndicesType, typename NZType = uint64_t>
-auto make_host_csr_structure_view(
+auto make_host_compressed_structure_view(
   IndptrType* indptr, IndicesType* indices, IndptrType n_rows, IndicesType n_cols, NZType nnz)
 {
   return host_compressed_structure_view<IndptrType, IndicesType, NZType>(
@@ -408,9 +405,9 @@ auto make_host_csr_structure_view(
  *
  */
 template <typename IndptrType, typename IndicesType, typename NZType = uint64_t>
-auto make_host_csr_structure_view(raft::host_span<IndptrType> indptr,
-                                  raft::host_span<IndicesType> indices,
-                                  IndicesType n_cols)
+auto make_host_compressed_structure_view(raft::host_span<IndptrType> indptr,
+                                         raft::host_span<IndicesType> indices,
+                                         IndicesType n_cols)
 {
   return host_compressed_structure_view<IndptrType, IndicesType, NZType>(indptr, indices, n_cols);
 }
diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp
index 207cc944d2..a14944ed5b 100644
--- a/cpp/include/raft/core/sparse_types.hpp
+++ b/cpp/include/raft/core/sparse_types.hpp
@@ -109,7 +109,7 @@ class sparse_matrix_view {
    * Return a view of the structure underlying this matrix
    * @return
    */
-  structure_view_type get_structure() { return structure_view_; }
+  structure_view_type structure_view() { return structure_view_; }
 
   /**
    * Return a span of the nonzero elements of the matrix
@@ -158,18 +158,19 @@ class sparse_matrix {
   using container_policy_type = ContainerPolicy<element_type>;
   using container_type        = typename container_policy_type::container_type;
 
+  // constructor that owns the data and the structure
   sparse_matrix(raft::resources const& handle,
                 row_type n_rows,
                 col_type n_cols,
                 nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v<container_type>)
-    : structure_{std::make_shared<structure_type>(handle, n_rows, n_cols, nnz)},
-      cp_{},
-      c_elements_{cp_.create(handle, 0)} {};
+    : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {};
 
   // Constructor that owns the data but not the structure
-  sparse_matrix(raft::resources const& handle, std::shared_ptr<structure_type> structure) noexcept(
+  // This constructor is only callable with a `structure_type == *_structure_view`
+  // which makes it okay to copy
+  sparse_matrix(raft::resources const& handle, structure_type structure) noexcept(
     std::is_nothrow_default_constructible_v<container_type>)
-    : structure_{structure}, cp_{}, c_elements_{cp_.create(handle, structure.get()->get_nnz())} {};
+    : structure_{structure}, cp_{}, c_elements_{cp_.create(handle, structure_.get_nnz())} {};
 
   constexpr sparse_matrix(sparse_matrix const&) noexcept(
     std::is_nothrow_copy_constructible_v<container_type>) = default;
@@ -187,7 +188,7 @@ class sparse_matrix {
 
   raft::span<ElementType, is_device> get_elements()
   {
-    return raft::span<ElementType, is_device>(c_elements_.data(), structure_view().get_nnz());
+    return raft::span<ElementType, is_device>(c_elements_.data(), structure_.get_nnz());
   }
 
   /**
@@ -209,7 +210,7 @@ class sparse_matrix {
   }
 
  protected:
-  std::shared_ptr<structure_type> structure_;
+  structure_type structure_;
   container_policy_type cp_;
   container_type c_elements_;
 };

From 515ee5fa43d55c0cc14ed166d955db72c7e10f36 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 19 Apr 2023 18:44:55 -0400
Subject: [PATCH 22/78] Add missing resource factory virtual destructor (#1433)

Closes #1425

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1433
---
 cpp/include/raft/core/resource/resource_types.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp
index cf302e25f9..8e331293bf 100644
--- a/cpp/include/raft/core/resource/resource_types.hpp
+++ b/cpp/include/raft/core/resource/resource_types.hpp
@@ -83,6 +83,8 @@ class resource_factory {
    * @return resource instance
    */
   virtual resource* make_resource() = 0;
+
+  virtual ~resource_factory() {}
 };
 
 /**

From bc732c09c86ec4e25fe16e2419c6fd7123f0be22 Mon Sep 17 00:00:00 2001
From: Jordan Jacobelli <jjacobelli@nvidia.com>
Date: Thu, 20 Apr 2023 21:50:19 +0200
Subject: [PATCH 23/78] Remove usage of rapids-get-rapids-version-from-git
 (#1436)

Instead of using `rapids-get-rapids-version-from-git` we can just hardcode the version and use `update-version.sh` to update it

Authors:
  - Jordan Jacobelli (https://github.com/jjacobelli)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1436
---
 ci/build_docs.sh             | 2 +-
 ci/release/update-version.sh | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 5db6fa11be..e52beb22ea 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -19,7 +19,7 @@ rapids-print-env
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
-VERSION_NUMBER=$(rapids-get-rapids-version-from-git)
+VERSION_NUMBER="23.06"
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index d8c22b4931..f6c6b08644 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -80,6 +80,7 @@ sed_runner "s/ucx-py.*\",/ucx-py==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\",/g" python
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done
+sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
 
 sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile
 

From 0ac32e181f32302537c5056e215cd5d99635a742 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 20 Apr 2023 17:36:42 -0400
Subject: [PATCH 24/78] The glog project root CMakeLists.txt is where we should
 build from (#1442)

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1442
---
 cpp/cmake/thirdparty/get_glog.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_glog.cmake b/cpp/cmake/thirdparty/get_glog.cmake
index 9334224de5..35a9170f99 100644
--- a/cpp/cmake/thirdparty/get_glog.cmake
+++ b/cpp/cmake/thirdparty/get_glog.cmake
@@ -26,7 +26,6 @@ function(find_and_configure_glog)
             CPM_ARGS
             GIT_REPOSITORY         https://github.com/${PKG_FORK}/glog.git
             GIT_TAG                ${PKG_PINNED_TAG}
-            SOURCE_SUBDIR          cpp
             EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL}
             )
 
@@ -46,4 +45,4 @@ find_and_configure_glog(VERSION 0.6.0
         FORK             google
         PINNED_TAG       v0.6.0
         EXCLUDE_FROM_ALL ON
-        )
\ No newline at end of file
+        )

From c0c4d52c5a72c494e070f42324fc80b3a7cda205 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Sun, 23 Apr 2023 16:12:39 -0700
Subject: [PATCH 25/78] fix ivf_pq n_probes (#1456)

The ivf-pq search code was including a guard like

```auto n_probes = std::min<uint32_t>(params.n_probes, index.n_lists());```

to check to make sure that we weren't selecting more values than are available. However,
this wasn't being used and instead just `params.n_probes` was being passed to functions
like `select_k`. This lead to asking select_k to select say 100 items, when there
were only 90 to choose from - and caused some issues downstream when trying to update
the select_k algorithm

Fix.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Micka (https://github.com/lowener)
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1456
---
 cpp/include/raft/neighbors/detail/ivf_pq_search.cuh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 4b6e6f5e31..9a94458748 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -1613,7 +1613,7 @@ inline void search(raft::device_resources const& handle,
 
   rmm::device_uvector<float> float_queries(max_queries * dim_ext, stream, mr);
   rmm::device_uvector<float> rot_queries(max_queries * index.rot_dim(), stream, mr);
-  rmm::device_uvector<uint32_t> clusters_to_probe(max_queries * params.n_probes, stream, mr);
+  rmm::device_uvector<uint32_t> clusters_to_probe(max_queries * n_probes, stream, mr);
 
   auto search_instance = ivfpq_search<IdxT>::fun(params, index.metric());
 
@@ -1624,7 +1624,7 @@ inline void search(raft::device_resources const& handle,
                     clusters_to_probe.data(),
                     float_queries.data(),
                     queries_batch,
-                    params.n_probes,
+                    n_probes,
                     index.n_lists(),
                     dim,
                     dim_ext,
@@ -1661,10 +1661,10 @@ inline void search(raft::device_resources const& handle,
       search_instance(handle,
                       index,
                       max_samples,
-                      params.n_probes,
+                      n_probes,
                       k,
                       batch_size,
-                      clusters_to_probe.data() + uint64_t(params.n_probes) * offset_b,
+                      clusters_to_probe.data() + uint64_t(n_probes) * offset_b,
                       rot_queries.data() + uint64_t(index.rot_dim()) * offset_b,
                       neighbors + uint64_t(k) * (offset_q + offset_b),
                       distances + uint64_t(k) * (offset_q + offset_b),

From 83c326ec42fbe64ad5149a1a12a3c754a88c5c71 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 24 Apr 2023 18:55:00 -0700
Subject: [PATCH 26/78] Update clang-format to 16.0.1. (#1412)

This PR updates the clang-format version used by pre-commit.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - Jordan Jacobelli (https://github.com/jjacobelli)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - Corey J. Nolet (https://github.com/cjnolet)
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1412
---
 .pre-commit-config.yaml                       |   2 +-
 .../all_cuda-118_arch-x86_64.yaml             |   4 +-
 .../bench_ann_cuda-118_arch-x86_64.yaml       |   4 +-
 cpp/bench/ann/src/common/dataset.h            |   4 +-
 cpp/bench/prims/matrix/select_k.cu            |  41 +++--
 cpp/bench/prims/neighbors/knn.cuh             |   9 +-
 .../raft/cluster/detail/kmeans_balanced.cuh   |   2 +-
 .../raft/cluster/single_linkage_types.hpp     |   8 +-
 cpp/include/raft/common/cub_wrappers.cuh      |   8 +-
 .../raft/common/device_loads_stores.cuh       |   8 +-
 cpp/include/raft/common/scatter.cuh           |   8 +-
 cpp/include/raft/common/seive.hpp             |   8 +-
 cpp/include/raft/core/csr_matrix.hpp          |   2 +-
 cpp/include/raft/core/cublas_macros.hpp       |   4 +-
 cpp/include/raft/core/cusolver_macros.hpp     |   4 +-
 cpp/include/raft/core/cusparse_macros.hpp     |   4 +-
 cpp/include/raft/core/detail/logger.hpp       |   8 +-
 .../core/detail/mdspan_numpy_serializer.hpp   |   8 +-
 cpp/include/raft/core/detail/nvtx.hpp         |   4 +-
 cpp/include/raft/core/detail/span.hpp         |  23 +--
 cpp/include/raft/core/device_coo_matrix.hpp   |   6 +-
 cpp/include/raft/core/device_csr_matrix.hpp   |   6 +-
 cpp/include/raft/core/device_mdspan.hpp       |  12 +-
 cpp/include/raft/core/device_resources.hpp    |   4 +-
 cpp/include/raft/core/handle.hpp              |   2 +-
 cpp/include/raft/core/host_coo_matrix.hpp     |   6 +-
 cpp/include/raft/core/host_csr_matrix.hpp     |   6 +-
 cpp/include/raft/core/host_mdspan.hpp         |   6 +-
 cpp/include/raft/core/interruptible.hpp       |   6 +-
 cpp/include/raft/core/kvp.hpp                 |   4 +-
 cpp/include/raft/core/mdarray.hpp             |  19 +--
 cpp/include/raft/core/mdspan.hpp              |  18 +--
 cpp/include/raft/core/nvtx.hpp                |   8 +-
 .../raft/core/resource/resource_types.hpp     |   2 +-
 cpp/include/raft/core/resources.hpp           |   2 +-
 cpp/include/raft/core/span.hpp                |   4 +-
 .../raft/core/temporary_device_buffer.hpp     |   4 +-
 .../distance/detail/distance_ops/cutlass.cuh  |   6 +-
 .../distance/detail/masked_distance_base.cuh  |   2 +-
 .../detail/pairwise_distance_base.cuh         |   2 +-
 .../detail/predicated_tile_iterator_normvec.h |   6 +-
 cpp/include/raft/lap/lap.cuh                  |   8 +-
 cpp/include/raft/lap/lap.hpp                  |   8 +-
 cpp/include/raft/linalg/add.cuh               |   4 +-
 cpp/include/raft/linalg/binary_op.cuh         |   4 +-
 .../raft/linalg/coalesced_reduction.cuh       |   4 +-
 cpp/include/raft/linalg/contractions.cuh      |  13 +-
 .../raft/linalg/detail/cublas_wrappers.hpp    |  10 +-
 .../raft/linalg/detail/map_then_reduce.cuh    |   3 +-
 cpp/include/raft/linalg/divide.cuh            |   4 +-
 cpp/include/raft/linalg/eig.cuh               |   4 +-
 cpp/include/raft/linalg/gemv.cuh              |   4 +-
 cpp/include/raft/linalg/lanczos.cuh           |   8 +-
 cpp/include/raft/linalg/lstsq.cuh             |   4 +-
 cpp/include/raft/linalg/matrix_vector_op.cuh  |   4 +-
 .../raft/linalg/mean_squared_error.cuh        |   4 +-
 cpp/include/raft/linalg/multiply.cuh          |   4 +-
 cpp/include/raft/linalg/power.cuh             |   4 +-
 cpp/include/raft/linalg/reduce.cuh            |   4 +-
 .../raft/linalg/reduce_cols_by_key.cuh        |   4 +-
 .../raft/linalg/reduce_rows_by_key.cuh        |   4 +-
 cpp/include/raft/linalg/rsvd.cuh              |   4 +-
 cpp/include/raft/linalg/sqrt.cuh              |   4 +-
 cpp/include/raft/linalg/strided_reduction.cuh |   4 +-
 cpp/include/raft/linalg/subtract.cuh          |   4 +-
 cpp/include/raft/linalg/svd.cuh               |   4 +-
 cpp/include/raft/linalg/ternary_op.cuh        |   4 +-
 cpp/include/raft/linalg/transpose.cuh         |   4 +-
 cpp/include/raft/linalg/unary_op.cuh          |   4 +-
 cpp/include/raft/matrix/col_wise_sort.cuh     |   2 +-
 .../raft/matrix/detail/select_warpsort.cuh    |   5 +-
 cpp/include/raft/matrix/math.cuh              |   6 +-
 cpp/include/raft/matrix/matrix.cuh            |   6 +-
 cpp/include/raft/matrix/matrix.hpp            |   8 +-
 cpp/include/raft/neighbors/ann_types.hpp      |  10 +-
 cpp/include/raft/neighbors/cagra_types.hpp    |   8 +-
 .../detail/cagra/compute_distance.hpp         |   2 +-
 .../raft/neighbors/detail/cagra/fragment.hpp  |   3 +-
 .../neighbors/detail/cagra/graph_core.cuh     |  16 +-
 .../detail/cagra/search_multi_cta.cuh         |  14 +-
 .../detail/cagra/search_multi_kernel.cuh      |  36 ++---
 .../neighbors/detail/cagra/search_plan.cuh    |   2 +-
 .../detail/cagra/search_single_cta.cuh        |  21 ++-
 .../detail/cagra/topk_for_cagra/topk_core.cuh |  14 +-
 .../detail/faiss_select/MergeNetworkBlock.cuh |   3 +-
 .../detail/faiss_select/MergeNetworkWarp.cuh  |   3 +-
 .../neighbors/detail/faiss_select/Select.cuh  |   3 +-
 .../raft/neighbors/detail/ivf_pq_build.cuh    |   2 +-
 .../raft/neighbors/detail/ivf_pq_search.cuh   |   2 +-
 cpp/include/raft/neighbors/ivf_flat_types.hpp |   8 +-
 cpp/include/raft/neighbors/ivf_list_types.hpp |   8 +-
 cpp/include/raft/neighbors/ivf_pq_types.hpp   |   8 +-
 cpp/include/raft/random/permute.cuh           |   3 +-
 .../random/sample_without_replacement.cuh     |   3 +-
 .../raft/sparse/detail/cusparse_wrappers.h    |   6 +-
 cpp/include/raft/sparse/hierarchy/common.h    |   8 +-
 .../raft/sparse/hierarchy/single_linkage.cuh  |   8 +-
 .../raft/sparse/linalg/detail/norm.cuh        |  22 +--
 .../raft/sparse/linalg/detail/spectral.cuh    |   2 +-
 cpp/include/raft/sparse/linalg/norm.cuh       |  10 +-
 cpp/include/raft/sparse/mst/mst.cuh           |   8 +-
 cpp/include/raft/sparse/mst/mst.hpp           |   8 +-
 cpp/include/raft/sparse/mst/mst_solver.cuh    |   8 +-
 .../raft/sparse/neighbors/detail/knn.cuh      |   2 +-
 cpp/include/raft/sparse/neighbors/knn.cuh     |   6 +-
 .../sparse/selection/connect_components.cuh   |   8 +-
 cpp/include/raft/sparse/selection/knn.cuh     |   8 +-
 .../raft/sparse/selection/knn_graph.cuh       |   8 +-
 cpp/include/raft/sparse/solver/mst_solver.cuh |   8 +-
 cpp/include/raft/spatial/knn/ann_common.h     |  10 +-
 cpp/include/raft/spatial/knn/ann_types.hpp    |   8 +-
 cpp/include/raft/spatial/knn/ball_cover.cuh   |   6 +-
 .../raft/spatial/knn/ball_cover_types.hpp     |   8 +-
 .../raft/spatial/knn/detail/ann_utils.cuh     |   6 +-
 .../raft/spatial/knn/epsilon_neighborhood.cuh |   8 +-
 cpp/include/raft/spatial/knn/ivf_flat.cuh     |   8 +-
 .../raft/spatial/knn/ivf_flat_types.hpp       |   8 +-
 cpp/include/raft/spatial/knn/ivf_pq.cuh       |   8 +-
 cpp/include/raft/spatial/knn/ivf_pq_types.hpp |   8 +-
 cpp/include/raft/spectral/detail/lapack.hpp   |  14 +-
 .../raft/stats/adjusted_rand_index.cuh        |   4 +-
 cpp/include/raft/stats/completeness_score.cuh |   4 +-
 cpp/include/raft/stats/cov.cuh                |   4 +-
 cpp/include/raft/stats/detail/minmax.cuh      |   5 +-
 cpp/include/raft/stats/entropy.cuh            |   4 +-
 cpp/include/raft/stats/histogram.cuh          |   4 +-
 cpp/include/raft/stats/homogeneity_score.cuh  |   4 +-
 cpp/include/raft/stats/kl_divergence.cuh      |   4 +-
 cpp/include/raft/stats/mean.cuh               |   4 +-
 cpp/include/raft/stats/mean_center.cuh        |   4 +-
 cpp/include/raft/stats/meanvar.cuh            |   2 +-
 cpp/include/raft/stats/minmax.cuh             |   4 +-
 cpp/include/raft/stats/mutual_info_score.cuh  |   4 +-
 cpp/include/raft/stats/rand_index.cuh         |   4 +-
 cpp/include/raft/stats/stddev.cuh             |   4 +-
 cpp/include/raft/stats/sum.cuh                |   4 +-
 cpp/include/raft/stats/v_measure.cuh          |   4 +-
 cpp/include/raft/stats/weighted_mean.cuh      |   4 +-
 cpp/include/raft/util/bitonic_sort.cuh        |   6 +-
 cpp/include/raft/util/cache.cuh               |   6 +-
 cpp/include/raft/util/cache_util.cuh          |   6 +-
 cpp/include/raft/util/integer_utils.hpp       |   6 +-
 cpp/include/raft/util/vectorized.cuh          |   5 +-
 cpp/include/raft_runtime/neighbors/refine.hpp |   2 +-
 cpp/scripts/run-clang-format.py               | 143 ------------------
 cpp/test/core/mdarray.cu                      |   2 +-
 cpp/test/core/mdspan_utils.cu                 |   3 +-
 cpp/test/distance/dist_canberra.cu            |   8 +-
 cpp/test/distance/dist_correlation.cu         |   8 +-
 cpp/test/distance/dist_cos.cu                 |   5 +-
 cpp/test/distance/dist_hamming.cu             |   8 +-
 cpp/test/distance/dist_hellinger.cu           |   8 +-
 cpp/test/distance/dist_inner_product.cu       |   6 +-
 cpp/test/distance/dist_jensen_shannon.cu      |   8 +-
 cpp/test/distance/dist_kl_divergence.cu       |   8 +-
 cpp/test/distance/dist_l1.cu                  |   8 +-
 cpp/test/distance/dist_l2_exp.cu              |   3 +-
 cpp/test/distance/dist_l2_sqrt_exp.cu         |   6 +-
 cpp/test/distance/dist_l2_unexp.cu            |   3 +-
 cpp/test/distance/dist_l_inf.cu               |   6 +-
 cpp/test/distance/dist_russell_rao.cu         |   8 +-
 cpp/test/distance/distance_base.cuh           |   4 +-
 cpp/test/linalg/rsvd.cu                       |  42 ++---
 cpp/test/neighbors/ann_cagra.cuh              |   2 +-
 cpp/test/neighbors/knn.cu                     |   4 +-
 cpp/test/sparse/spgemmi.cu                    |  28 ++--
 cpp/test/util/bitonic_sort.cu                 |  22 +--
 dependencies.yaml                             |   4 +-
 docs/source/developer_guide.md                | 107 ++++++++-----
 thirdparty/pcg/pcg_basic.c                    |  93 +++++-------
 170 files changed, 636 insertions(+), 853 deletions(-)
 delete mode 100755 cpp/scripts/run-clang-format.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d6e4ecb676..2a70632497 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,7 +43,7 @@ repos:
                 additional_dependencies: [toml]
                 args: ["--config=pyproject.toml"]
       - repo: https://github.com/pre-commit/mirrors-clang-format
-        rev: v11.1.0
+        rev: v16.0.1
         hooks:
               - id: clang-format
                 types_or: [c, c++, cuda]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 0e06076f1a..d192aefa7c 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -9,8 +9,8 @@ channels:
 dependencies:
 - breathe
 - c-compiler
-- clang-tools=11.1.0
-- clang=11.1.0
+- clang-tools=16.0.1
+- clang=16.0.1
 - cmake>=3.23.1,!=3.25.0
 - cuda-profiler-api=11.8.86
 - cuda-python >=11.7.1,<12.0
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 5965aaef8f..2013c16fa4 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -8,8 +8,8 @@ channels:
 - nvidia
 dependencies:
 - c-compiler
-- clang-tools=11.1.0
-- clang=11.1.0
+- clang-tools=16.0.1
+- clang=16.0.1
 - cmake>=3.23.1,!=3.25.0
 - cuda-profiler-api=11.8.86
 - cudatoolkit=11.8
diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.h
index 1244935c99..46dd66d649 100644
--- a/cpp/bench/ann/src/common/dataset.h
+++ b/cpp/bench/ann/src/common/dataset.h
@@ -47,7 +47,7 @@ class BinFile {
           uint32_t subset_first_row = 0,
           uint32_t subset_size      = 0);
   ~BinFile() { fclose(fp_); }
-  BinFile(const BinFile&) = delete;
+  BinFile(const BinFile&)            = delete;
   BinFile& operator=(const BinFile&) = delete;
 
   void get_shape(size_t* nrows, int* ndims)
@@ -219,7 +219,7 @@ class Dataset {
   Dataset(const std::string& name, const std::string& distance) : name_(name), distance_(distance)
   {
   }
-  Dataset(const Dataset&) = delete;
+  Dataset(const Dataset&)            = delete;
   Dataset& operator=(const Dataset&) = delete;
   virtual ~Dataset();
 
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 870119db52..1ff584ca58 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -157,34 +157,33 @@ const std::vector<select::params> kInputs{
   {10, 1000000, 256, true, false, true},
 };
 
-#define SELECTION_REGISTER(KeyT, IdxT, A)                          \
-  namespace BENCHMARK_PRIVATE_NAME(selection)                      \
-  {                                                                \
-    using SelectK = selection<KeyT, IdxT, select::Algo::A>;        \
-    RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \
+#define SELECTION_REGISTER(KeyT, IdxT, A)                        \
+  namespace BENCHMARK_PRIVATE_NAME(selection) {                  \
+  using SelectK = selection<KeyT, IdxT, select::Algo::A>;        \
+  RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \
   }
 
-SELECTION_REGISTER(float, uint32_t, kPublicApi);             // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix8bits);            // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix11bits);           // NOLINT
-SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass);  // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpAuto);              // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpImmediate);         // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpFiltered);          // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpDistributed);       // NOLINT
-SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm);    // NOLINT
+SELECTION_REGISTER(float, uint32_t, kPublicApi);              // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix8bits);             // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix11bits);            // NOLINT
+SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass);   // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpAuto);               // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpImmediate);          // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpFiltered);           // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpDistributed);        // NOLINT
+SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm);     // NOLINT
 
 SELECTION_REGISTER(double, uint32_t, kRadix8bits);            // NOLINT
 SELECTION_REGISTER(double, uint32_t, kRadix11bits);           // NOLINT
 SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass);  // NOLINT
 SELECTION_REGISTER(double, uint32_t, kWarpAuto);              // NOLINT
 
-SELECTION_REGISTER(double, int64_t, kRadix8bits);            // NOLINT
-SELECTION_REGISTER(double, int64_t, kRadix11bits);           // NOLINT
-SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass);  // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpImmediate);         // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpFiltered);          // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpDistributed);       // NOLINT
-SELECTION_REGISTER(double, int64_t, kWarpDistributedShm);    // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix8bits);             // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix11bits);            // NOLINT
+SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass);   // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpImmediate);          // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpFiltered);           // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpDistributed);        // NOLINT
+SELECTION_REGISTER(double, int64_t, kWarpDistributedShm);     // NOLINT
 
 }  // namespace raft::matrix
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index 8f0b1cb5d9..5431b9492e 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -384,11 +384,10 @@ inline const std::vector<TransferStrategy> kNoCopyOnly{TransferStrategy::NO_COPY
 inline const std::vector<Scope> kScopeFull{Scope::BUILD_SEARCH};
 inline const std::vector<Scope> kAllScopes{Scope::BUILD_SEARCH, Scope::SEARCH, Scope::BUILD};
 
-#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope)                   \
-  namespace BENCHMARK_PRIVATE_NAME(knn)                                          \
-  {                                                                              \
-    using KNN = knn<ValT, IdxT, ImplT<ValT, IdxT>>;                              \
-    RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \
+#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope)                 \
+  namespace BENCHMARK_PRIVATE_NAME(knn) {                                      \
+  using KNN = knn<ValT, IdxT, ImplT<ValT, IdxT>>;                              \
+  RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \
   }
 
 }  // namespace raft::bench::spatial
diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
index 3d23c809c3..4f7cae1ad9 100644
--- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
@@ -436,7 +436,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY))
   adjust_centers_kernel(MathT* centers,  // [n_clusters, dim]
                         IdxT n_clusters,
                         IdxT dim,
-                        const T* dataset,  // [n_rows, dim]
+                        const T* dataset,               // [n_rows, dim]
                         IdxT n_rows,
                         const LabelT* labels,           // [n_rows]
                         const CounterT* cluster_sizes,  // [n_clusters]
diff --git a/cpp/include/raft/cluster/single_linkage_types.hpp b/cpp/include/raft/cluster/single_linkage_types.hpp
index 9a4fcfef60..cd815622bf 100644
--- a/cpp/include/raft/cluster/single_linkage_types.hpp
+++ b/cpp/include/raft/cluster/single_linkage_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,9 +77,7 @@ class linkage_output {
   }
 };
 
-class linkage_output_int : public linkage_output<int> {
-};
-class linkage_output_int64 : public linkage_output<int64_t> {
-};
+class linkage_output_int : public linkage_output<int> {};
+class linkage_output_int64 : public linkage_output<int64_t> {};
 
 };  // namespace raft::cluster
diff --git a/cpp/include/raft/common/cub_wrappers.cuh b/cpp/include/raft/common/cub_wrappers.cuh
index e80d7cccd9..dd8fc2d103 100644
--- a/cpp/include/raft/common/cub_wrappers.cuh
+++ b/cpp/include/raft/common/cub_wrappers.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please note that there is no equivalent in RAFT's public API"
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please note that there is no equivalent in RAFT's public API"
                 " so this file will eventually be removed altogether.")
 
 #include <raft/util/detail/cub_wrappers.cuh>
diff --git a/cpp/include/raft/common/device_loads_stores.cuh b/cpp/include/raft/common/device_loads_stores.cuh
index f3cfbd81cc..6c62cd70cc 100644
--- a/cpp/include/raft/common/device_loads_stores.cuh
+++ b/cpp/include/raft/common/device_loads_stores.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,8 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/util version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/util version instead.")
 
 #include <raft/util/device_loads_stores.cuh>
diff --git a/cpp/include/raft/common/scatter.cuh b/cpp/include/raft/common/scatter.cuh
index 0e83f9a5cd..72de79a596 100644
--- a/cpp/include/raft/common/scatter.cuh
+++ b/cpp/include/raft/common/scatter.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,8 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/matrix version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/matrix version instead.")
 
 #include <raft/util/scatter.cuh>
diff --git a/cpp/include/raft/common/seive.hpp b/cpp/include/raft/common/seive.hpp
index 633c8dd3e1..433b032b0f 100644
--- a/cpp/include/raft/common/seive.hpp
+++ b/cpp/include/raft/common/seive.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,8 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/util version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/util version instead.")
 
 #include <raft/util/seive.hpp>
diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp
index c37cfa41c8..95d09d3eea 100644
--- a/cpp/include/raft/core/csr_matrix.hpp
+++ b/cpp/include/raft/core/csr_matrix.hpp
@@ -141,7 +141,7 @@ class compressed_structure
   constexpr auto operator=(compressed_structure const&) noexcept(
     std::is_nothrow_copy_assignable<indptr_container_type>::value)
     -> compressed_structure& = default;
-  constexpr auto operator    =(compressed_structure&&) noexcept(
+  constexpr auto operator=(compressed_structure&&) noexcept(
     std::is_nothrow_move_assignable<indptr_container_type>::value)
     -> compressed_structure& = default;
 
diff --git a/cpp/include/raft/core/cublas_macros.hpp b/cpp/include/raft/core/cublas_macros.hpp
index 855c1228f7..5c56240ccf 100644
--- a/cpp/include/raft/core/cublas_macros.hpp
+++ b/cpp/include/raft/core/cublas_macros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@
 #include <raft/core/error.hpp>
 
 ///@todo: enable this once we have logger enabled
-//#include <cuml/common/logger.hpp>
+// #include <cuml/common/logger.hpp>
 
 #include <cstdint>
 
diff --git a/cpp/include/raft/core/cusolver_macros.hpp b/cpp/include/raft/core/cusolver_macros.hpp
index 8f7caf65f3..4477d32118 100644
--- a/cpp/include/raft/core/cusolver_macros.hpp
+++ b/cpp/include/raft/core/cusolver_macros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
 #include <cusolverDn.h>
 #include <cusolverSp.h>
 ///@todo: enable this once logging is enabled
-//#include <cuml/common/logger.hpp>
+// #include <cuml/common/logger.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <type_traits>
 
diff --git a/cpp/include/raft/core/cusparse_macros.hpp b/cpp/include/raft/core/cusparse_macros.hpp
index 8a9aab55f7..21a25ae28c 100644
--- a/cpp/include/raft/core/cusparse_macros.hpp
+++ b/cpp/include/raft/core/cusparse_macros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 #include <cusparse.h>
 #include <raft/core/error.hpp>
 ///@todo: enable this once logging is enabled
-//#include <cuml/common/logger.hpp>
+// #include <cuml/common/logger.hpp>
 
 #define _CUSPARSE_ERR_TO_STR(err) \
   case err: return #err;
diff --git a/cpp/include/raft/core/detail/logger.hpp b/cpp/include/raft/core/detail/logger.hpp
index 619fb89452..532aee4d90 100644
--- a/cpp/include/raft/core/detail/logger.hpp
+++ b/cpp/include/raft/core/detail/logger.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#pragma message(__FILE__                                                 \
-                " is deprecated and will be removed in future releases." \
-                " Please use the <raft/core/logger.hpp> version instead.")
+#pragma message(__FILE__                                                   \
+                  " is deprecated and will be removed in future releases." \
+                  " Please use the <raft/core/logger.hpp> version instead.")
 
 #include <raft/core/logger.hpp>
diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
index df89811636..d0aea4168e 100644
--- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
+++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
@@ -74,7 +74,7 @@ namespace numpy_serializer {
 
 #if RAFT_SYSTEM_LITTLE_ENDIAN == 1
 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR
-#else  // RAFT_SYSTEM_LITTLE_ENDIAN == 1
+#else   // RAFT_SYSTEM_LITTLE_ENDIAN == 1
 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR
 #endif  // RAFT_SYSTEM_LITTLE_ENDIAN == 1
 
@@ -110,11 +110,9 @@ struct header_t {
 };
 
 template <class T>
-struct is_complex : std::false_type {
-};
+struct is_complex : std::false_type {};
 template <class T>
-struct is_complex<std::complex<T>> : std::true_type {
-};
+struct is_complex<std::complex<T>> : std::true_type {};
 
 template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>, bool> = true>
 inline dtype_t get_numpy_dtype()
diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp
index adbf3a3666..ca4c5e4a08 100644
--- a/cpp/include/raft/core/detail/nvtx.hpp
+++ b/cpp/include/raft/core/detail/nvtx.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -191,7 +191,7 @@ inline void pop_range()
   nvtxDomainRangePop(domain_store<Domain>::value());
 }
 
-#else  // NVTX_ENABLED
+#else   // NVTX_ENABLED
 
 template <typename Domain, typename... Args>
 inline void push_range(const char* format, Args... args)
diff --git a/cpp/include/raft/core/detail/span.hpp b/cpp/include/raft/core/detail/span.hpp
index 20500d618b..e6ccb8535c 100644
--- a/cpp/include/raft/core/detail/span.hpp
+++ b/cpp/include/raft/core/detail/span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,8 +37,7 @@ template <std::size_t Extent, std::size_t Offset, std::size_t Count>
 struct extent_value_t
   : public std::integral_constant<
       std::size_t,
-      Count != dynamic_extent ? Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {
-};
+      Count != dynamic_extent ? Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {};
 
 /*!
  * If N is dynamic_extent, the extent of the returned span E is also
@@ -47,31 +46,25 @@ struct extent_value_t
 template <typename T, std::size_t Extent>
 struct extent_as_bytes_value_t
   : public std::integral_constant<std::size_t,
-                                  Extent == dynamic_extent ? Extent : sizeof(T) * Extent> {
-};
+                                  Extent == dynamic_extent ? Extent : sizeof(T) * Extent> {};
 
 template <std::size_t From, std::size_t To>
 struct is_allowed_extent_conversion_t
   : public std::integral_constant<bool,
-                                  From == To || From == dynamic_extent || To == dynamic_extent> {
-};
+                                  From == To || From == dynamic_extent || To == dynamic_extent> {};
 
 template <class From, class To>
 struct is_allowed_element_type_conversion_t
-  : public std::integral_constant<bool, std::is_convertible<From (*)[], To (*)[]>::value> {
-};
+  : public std::integral_constant<bool, std::is_convertible<From (*)[], To (*)[]>::value> {};
 
 template <class T>
-struct is_span_oracle_t : std::false_type {
-};
+struct is_span_oracle_t : std::false_type {};
 
 template <class T, bool is_device, std::size_t Extent>
-struct is_span_oracle_t<span<T, is_device, Extent>> : std::true_type {
-};
+struct is_span_oracle_t<span<T, is_device, Extent>> : std::true_type {};
 
 template <class T>
-struct is_span_t : public is_span_oracle_t<typename std::remove_cv<T>::type> {
-};
+struct is_span_t : public is_span_oracle_t<typename std::remove_cv<T>::type> {};
 
 template <class InputIt1, class InputIt2, class Compare>
 _RAFT_HOST_DEVICE constexpr auto lexicographical_compare(InputIt1 first1,
diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp
index 35be67431d..ce016dd5e0 100644
--- a/cpp/include/raft/core/device_coo_matrix.hpp
+++ b/cpp/include/raft/core/device_coo_matrix.hpp
@@ -79,8 +79,7 @@ template <typename RowType, typename ColType, typename NZType>
 using device_coordinate_structure_view = coordinate_structure_view<RowType, ColType, NZType, true>;
 
 template <typename T>
-struct is_device_coo_matrix : std::false_type {
-};
+struct is_device_coo_matrix : std::false_type {};
 
 template <typename ElementType,
           typename RowType,
@@ -91,8 +90,7 @@ template <typename ElementType,
           SparsityType sparsity_type>
 struct is_device_coo_matrix<
   device_coo_matrix<ElementType, RowType, ColType, NZType, ContainerPolicy, sparsity_type>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 template <typename T>
 constexpr bool is_device_coo_matrix_v = is_device_coo_matrix<T>::value;
diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp
index e4ec15f9bd..869034e925 100644
--- a/cpp/include/raft/core/device_csr_matrix.hpp
+++ b/cpp/include/raft/core/device_csr_matrix.hpp
@@ -46,8 +46,7 @@ using device_sparsity_owning_csr_matrix =
   csr_matrix<ElementType, IndptrType, IndicesType, NZType, true, ContainerPolicy>;
 
 template <typename T>
-struct is_device_csr_matrix : std::false_type {
-};
+struct is_device_csr_matrix : std::false_type {};
 
 template <typename ElementType,
           typename IndptrType,
@@ -58,8 +57,7 @@ template <typename ElementType,
           SparsityType sparsity_type>
 struct is_device_csr_matrix<
   device_csr_matrix<ElementType, IndptrType, IndicesType, NZType, ContainerPolicy, sparsity_type>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 template <typename T>
 constexpr bool is_device_csr_matrix_v = is_device_csr_matrix<T>::value;
diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp
index f72ae36d64..7510c388fe 100644
--- a/cpp/include/raft/core/device_mdspan.hpp
+++ b/cpp/include/raft/core/device_mdspan.hpp
@@ -45,11 +45,9 @@ template <typename ElementType,
 using managed_mdspan = mdspan<ElementType, Extents, LayoutPolicy, managed_accessor<AccessorPolicy>>;
 
 template <typename T, bool B>
-struct is_device_mdspan : std::false_type {
-};
+struct is_device_mdspan : std::false_type {};
 template <typename T>
-struct is_device_mdspan<T, true> : std::bool_constant<T::accessor_type::is_device_accessible> {
-};
+struct is_device_mdspan<T, true> : std::bool_constant<T::accessor_type::is_device_accessible> {};
 
 /**
  * @\brief Boolean to determine if template type T is either raft::device_mdspan or a derived type
@@ -64,11 +62,9 @@ template <typename T>
 using is_output_device_mdspan_t = is_device_mdspan<T, is_output_mdspan_v<T>>;
 
 template <typename T, bool B>
-struct is_managed_mdspan : std::false_type {
-};
+struct is_managed_mdspan : std::false_type {};
 template <typename T>
-struct is_managed_mdspan<T, true> : std::bool_constant<T::accessor_type::is_managed_accessible> {
-};
+struct is_managed_mdspan<T, true> : std::bool_constant<T::accessor_type::is_managed_accessible> {};
 
 /**
  * @\brief Boolean to determine if template type T is either raft::managed_mdspan or a derived type
diff --git a/cpp/include/raft/core/device_resources.hpp b/cpp/include/raft/core/device_resources.hpp
index df6b39a368..1cab36561a 100644
--- a/cpp/include/raft/core/device_resources.hpp
+++ b/cpp/include/raft/core/device_resources.hpp
@@ -69,7 +69,7 @@ class device_resources : public resources {
   }
 
   device_resources(const device_resources& handle) : resources{handle} {}
-  device_resources(device_resources&&) = delete;
+  device_resources(device_resources&&)            = delete;
   device_resources& operator=(device_resources&&) = delete;
 
   /**
@@ -246,7 +246,7 @@ class stream_syncer {
     handle_.sync_stream_pool();
   }
 
-  stream_syncer(const stream_syncer& other) = delete;
+  stream_syncer(const stream_syncer& other)            = delete;
   stream_syncer& operator=(const stream_syncer& other) = delete;
 
  private:
diff --git a/cpp/include/raft/core/handle.hpp b/cpp/include/raft/core/handle.hpp
index 02efebec9e..2a6b5657e2 100644
--- a/cpp/include/raft/core/handle.hpp
+++ b/cpp/include/raft/core/handle.hpp
@@ -39,7 +39,7 @@ class handle_t : public raft::device_resources {
 
   handle_t(const handle_t& handle) : device_resources{handle} {}
 
-  handle_t(handle_t&&) = delete;
+  handle_t(handle_t&&)            = delete;
   handle_t& operator=(handle_t&&) = delete;
 
   /**
diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp
index 8fabf5aa95..32e7a9e3c4 100644
--- a/cpp/include/raft/core/host_coo_matrix.hpp
+++ b/cpp/include/raft/core/host_coo_matrix.hpp
@@ -78,8 +78,7 @@ template <typename RowType, typename ColType, typename NZType>
 using host_coordinate_structure_view = coordinate_structure_view<RowType, ColType, NZType, false>;
 
 template <typename T>
-struct is_host_coo_matrix : std::false_type {
-};
+struct is_host_coo_matrix : std::false_type {};
 
 template <typename ElementType,
           typename RowType,
@@ -90,8 +89,7 @@ template <typename ElementType,
           SparsityType sparsity_type>
 struct is_host_coo_matrix<
   host_coo_matrix<ElementType, RowType, ColType, NZType, ContainerPolicy, sparsity_type>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 template <typename T>
 constexpr bool is_host_coo_matrix_v = is_host_coo_matrix<T>::value;
diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp
index c64bcdcea6..86199335f2 100644
--- a/cpp/include/raft/core/host_csr_matrix.hpp
+++ b/cpp/include/raft/core/host_csr_matrix.hpp
@@ -45,8 +45,7 @@ using host_sparsity_owning_csr_matrix =
   csr_matrix<ElementType, IndptrType, IndicesType, NZType, false, ContainerPolicy>;
 
 template <typename T>
-struct is_host_csr_matrix : std::false_type {
-};
+struct is_host_csr_matrix : std::false_type {};
 
 template <typename ElementType,
           typename IndptrType,
@@ -57,8 +56,7 @@ template <typename ElementType,
           SparsityType sparsity_type>
 struct is_host_csr_matrix<
   host_csr_matrix<ElementType, IndptrType, IndicesType, NZType, ContainerPolicy, sparsity_type>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 template <typename T>
 constexpr bool is_host_csr_matrix_v = is_host_csr_matrix<T>::value;
diff --git a/cpp/include/raft/core/host_mdspan.hpp b/cpp/include/raft/core/host_mdspan.hpp
index a6cdec7a84..9a675680ac 100644
--- a/cpp/include/raft/core/host_mdspan.hpp
+++ b/cpp/include/raft/core/host_mdspan.hpp
@@ -37,11 +37,9 @@ template <typename ElementType,
 using host_mdspan = mdspan<ElementType, Extents, LayoutPolicy, host_accessor<AccessorPolicy>>;
 
 template <typename T, bool B>
-struct is_host_mdspan : std::false_type {
-};
+struct is_host_mdspan : std::false_type {};
 template <typename T>
-struct is_host_mdspan<T, true> : std::bool_constant<T::accessor_type::is_host_accessible> {
-};
+struct is_host_mdspan<T, true> : std::bool_constant<T::accessor_type::is_host_accessible> {};
 
 /**
  * @\brief Boolean to determine if template type T is either raft::host_mdspan or a derived type
diff --git a/cpp/include/raft/core/interruptible.hpp b/cpp/include/raft/core/interruptible.hpp
index 0cc4af2bbf..62e481a801 100644
--- a/cpp/include/raft/core/interruptible.hpp
+++ b/cpp/include/raft/core/interruptible.hpp
@@ -172,10 +172,10 @@ class interruptible {
   inline void cancel() noexcept { continue_.clear(std::memory_order_relaxed); }
 
   // don't allow the token to leave the shared_ptr
-  interruptible(interruptible const&) = delete;
-  interruptible(interruptible&&)      = delete;
+  interruptible(interruptible const&)                    = delete;
+  interruptible(interruptible&&)                         = delete;
   auto operator=(interruptible const&) -> interruptible& = delete;
-  auto operator=(interruptible&&) -> interruptible& = delete;
+  auto operator=(interruptible&&) -> interruptible&      = delete;
 
  private:
   /** Global registry of thread-local cancellation stores. */
diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp
index 192d160d45..2e0d1117a1 100644
--- a/cpp/include/raft/core/kvp.hpp
+++ b/cpp/include/raft/core/kvp.hpp
@@ -32,8 +32,8 @@ struct KeyValuePair {
   typedef _Key Key;      ///< Key data type
   typedef _Value Value;  ///< Value data type
 
-  Key key;      ///< Item key
-  Value value;  ///< Item value
+  Key key;               ///< Item key
+  Value value;           ///< Item value
 
   /// Constructor
   RAFT_INLINE_FUNCTION KeyValuePair() {}
diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index 88f90485dd..e1209835c9 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -55,12 +55,10 @@ class array_interface {
 
 namespace detail {
 template <typename T, typename = void>
-struct is_array_interface : std::false_type {
-};
+struct is_array_interface : std::false_type {};
 template <typename T>
 struct is_array_interface<T, std::void_t<decltype(std::declval<T>().view())>>
-  : std::bool_constant<is_mdspan_v<decltype(std::declval<T>().view())>> {
-};
+  : std::bool_constant<is_mdspan_v<decltype(std::declval<T>().view())>> {};
 
 template <typename T>
 using is_array_interface_t = is_array_interface<std::remove_const_t<T>>;
@@ -75,16 +73,13 @@ inline constexpr bool is_array_interface_v = is_array_interface<std::remove_cons
 }  // namespace detail
 
 template <typename...>
-struct is_array_interface : std::true_type {
-};
+struct is_array_interface : std::true_type {};
 template <typename T1>
-struct is_array_interface<T1> : detail::is_array_interface_t<T1> {
-};
+struct is_array_interface<T1> : detail::is_array_interface_t<T1> {};
 template <typename T1, typename... Tn>
 struct is_array_interface<T1, Tn...> : std::conditional_t<detail::is_array_interface_v<T1>,
                                                           is_array_interface<Tn...>,
-                                                          std::false_type> {
-};
+                                                          std::false_type> {};
 /**
  * @\brief Boolean to determine if variadic template types Tn are raft::array_interface
  *         or derived type or any type that has a member function `view()` that returns either
@@ -177,9 +172,9 @@ class mdarray
   constexpr mdarray(mdarray&&) noexcept(std::is_nothrow_move_constructible<container_type>::value) =
     default;
 
-  constexpr auto operator                                               =(mdarray const&) noexcept(
+  constexpr auto operator=(mdarray const&) noexcept(
     std::is_nothrow_copy_assignable<container_type>::value) -> mdarray& = default;
-  constexpr auto operator                                               =(mdarray&&) noexcept(
+  constexpr auto operator=(mdarray&&) noexcept(
     std::is_nothrow_move_assignable<container_type>::value) -> mdarray& = default;
 
   ~mdarray() noexcept(std::is_nothrow_destructible<container_type>::value) = default;
diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp
index 1c69cdd973..cd9ca26ed9 100644
--- a/cpp/include/raft/core/mdspan.hpp
+++ b/cpp/include/raft/core/mdspan.hpp
@@ -85,28 +85,22 @@ template <typename ElementType, typename Extents, typename LayoutPolicy, typenam
 void __takes_an_mdspan_ptr(mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>*);
 
 template <typename T, typename = void>
-struct is_mdspan : std::false_type {
-};
+struct is_mdspan : std::false_type {};
 template <typename T>
 struct is_mdspan<T, std::void_t<decltype(__takes_an_mdspan_ptr(std::declval<T*>()))>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 template <typename T, typename = void>
-struct is_input_mdspan : std::false_type {
-};
+struct is_input_mdspan : std::false_type {};
 template <typename T>
 struct is_input_mdspan<T, std::void_t<decltype(__takes_an_mdspan_ptr(std::declval<T*>()))>>
-  : std::bool_constant<std::is_const_v<typename T::element_type>> {
-};
+  : std::bool_constant<std::is_const_v<typename T::element_type>> {};
 
 template <typename T, typename = void>
-struct is_output_mdspan : std::false_type {
-};
+struct is_output_mdspan : std::false_type {};
 template <typename T>
 struct is_output_mdspan<T, std::void_t<decltype(__takes_an_mdspan_ptr(std::declval<T*>()))>>
-  : std::bool_constant<not std::is_const_v<typename T::element_type>> {
-};
+  : std::bool_constant<not std::is_const_v<typename T::element_type>> {};
 
 template <typename T>
 using is_mdspan_t = is_mdspan<std::remove_const_t<T>>;
diff --git a/cpp/include/raft/core/nvtx.hpp b/cpp/include/raft/core/nvtx.hpp
index 09a41f10a6..57338c32c7 100644
--- a/cpp/include/raft/core/nvtx.hpp
+++ b/cpp/include/raft/core/nvtx.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -144,9 +144,9 @@ class range {
   ~range() { pop_range<Domain>(); }
 
   /* This object is not meant to be touched. */
-  range(const range&) = delete;
-  range(range&&)      = delete;
-  auto operator=(const range&) -> range& = delete;
+  range(const range&)                              = delete;
+  range(range&&)                                   = delete;
+  auto operator=(const range&) -> range&           = delete;
   auto operator=(range&&) -> range&                = delete;
   static auto operator new(std::size_t) -> void*   = delete;
   static auto operator new[](std::size_t) -> void* = delete;
diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp
index 8e331293bf..2dc4eb1f9d 100644
--- a/cpp/include/raft/core/resource/resource_types.hpp
+++ b/cpp/include/raft/core/resource/resource_types.hpp
@@ -42,7 +42,7 @@ enum resource_type {
   THRUST_POLICY,           // thrust execution policy
   WORKSPACE_RESOURCE,      // rmm device memory resource
 
-  LAST_KEY  // reserved for the last key
+  LAST_KEY                 // reserved for the last key
 };
 
 /**
diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp
index 64e281e934..4de7d43e76 100644
--- a/cpp/include/raft/core/resources.hpp
+++ b/cpp/include/raft/core/resources.hpp
@@ -67,7 +67,7 @@ class resources {
    * Note that this does not create any new resources.
    */
   resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {}
-  resources(resources&&) = delete;
+  resources(resources&&)            = delete;
   resources& operator=(resources&&) = delete;
 
   /**
diff --git a/cpp/include/raft/core/span.hpp b/cpp/include/raft/core/span.hpp
index 188d58c896..a896ba1977 100644
--- a/cpp/include/raft/core/span.hpp
+++ b/cpp/include/raft/core/span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -104,7 +104,7 @@ class span {
   constexpr span(span&& other) noexcept      = default;
 
   constexpr auto operator=(span const& other) noexcept -> span& = default;
-  constexpr auto operator=(span&& other) noexcept -> span& = default;
+  constexpr auto operator=(span&& other) noexcept -> span&      = default;
 
   constexpr auto begin() const noexcept -> iterator { return data(); }
 
diff --git a/cpp/include/raft/core/temporary_device_buffer.hpp b/cpp/include/raft/core/temporary_device_buffer.hpp
index 194471c5de..4baa7e9597 100644
--- a/cpp/include/raft/core/temporary_device_buffer.hpp
+++ b/cpp/include/raft/core/temporary_device_buffer.hpp
@@ -55,10 +55,10 @@ class temporary_device_buffer {
   static constexpr bool is_const_pointer_ = std::is_const_v<ElementType>;
 
  public:
-  temporary_device_buffer(temporary_device_buffer const&) = delete;
+  temporary_device_buffer(temporary_device_buffer const&)            = delete;
   temporary_device_buffer& operator=(temporary_device_buffer const&) = delete;
 
-  constexpr temporary_device_buffer(temporary_device_buffer&&) = default;
+  constexpr temporary_device_buffer(temporary_device_buffer&&)            = default;
   constexpr temporary_device_buffer& operator=(temporary_device_buffer&&) = default;
 
   /**
diff --git a/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh b/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh
index 7a4fe0ce83..68e843c6f5 100644
--- a/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh
+++ b/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh
@@ -30,13 +30,11 @@ namespace raft::distance::detail::ops {
 // This pattern is described in:
 // https://en.cppreference.com/w/cpp/types/void_t
 template <typename, typename = void>
-struct has_cutlass_op : std::false_type {
-};
+struct has_cutlass_op : std::false_type {};
 
 // Specialization recognizes types that do support CUTLASS
 template <typename T>
 struct has_cutlass_op<T, std::void_t<decltype(std::declval<T>().get_cutlass_op())>>
-  : std::true_type {
-};
+  : std::true_type {};
 
 }  // namespace raft::distance::detail::ops
diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh
index 55da634145..5a33c9ce4a 100644
--- a/cpp/include/raft/distance/detail/masked_distance_base.cuh
+++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh
@@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass {
         }  // tile_idx_n
       }    // idx_g
       rowEpilog_op(tile_idx_m);
-    }  // tile_idx_m
+    }      // tile_idx_m
   }
 
  private:
diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
index c6b09be31e..58b5daa8ca 100644
--- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
@@ -18,7 +18,7 @@
 #include <raft/util/cuda_dev_essentials.cuh>  // ceildiv
 #include <raft/util/cuda_rt_essentials.hpp>   // RAFT_CUDA_TRY
 
-#include <cstddef>  // size_t
+#include <cstddef>                            // size_t
 
 namespace raft {
 namespace distance {
diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
index 67c01448dc..ebe6d0c80a 100644
--- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
+++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,8 +57,8 @@ namespace threadblock {
 ///
 /// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator
 ///
-template <typename ThreadMap_,  ///< Thread map (conept: OutputTileThreadMap)
-          typename Element_,    ///< Element data type
+template <typename ThreadMap_,        ///< Thread map (conept: OutputTileThreadMap)
+          typename Element_,          ///< Element data type
           typename Layout_,
           bool ScatterD     = false,  ///< Scatter D operand or not
           bool UseCUDAStore = false>
diff --git a/cpp/include/raft/lap/lap.cuh b/cpp/include/raft/lap/lap.cuh
index ca7d5e96a9..f7828294cd 100644
--- a/cpp/include/raft/lap/lap.cuh
+++ b/cpp/include/raft/lap/lap.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/solver version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/solver version instead.")
 
 #include <raft/solver/linear_assignment.cuh>
 
diff --git a/cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.hpp
index 30f2b53e52..5472422053 100644
--- a/cpp/include/raft/lap/lap.hpp
+++ b/cpp/include/raft/lap/lap.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,8 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the cuh version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the cuh version instead.")
 
 #include <raft/solver/linear_assignment.cuh>
diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh
index 608c63e1a9..c19f491319 100644
--- a/cpp/include/raft/linalg/add.cuh
+++ b/cpp/include/raft/linalg/add.cuh
@@ -216,7 +216,7 @@ void add_scalar(raft::device_resources const& handle,
 
 /** @} */  // end of group add
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh
index ed083a1590..88c49d1f42 100644
--- a/cpp/include/raft/linalg/binary_op.cuh
+++ b/cpp/include/raft/linalg/binary_op.cuh
@@ -82,7 +82,7 @@ void binary_op(raft::device_resources const& handle, InType in1, InType in2, Out
 
 /** @} */  // end of group binary_op
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh
index 674be207d8..48c121c359 100644
--- a/cpp/include/raft/linalg/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/coalesced_reduction.cuh
@@ -159,7 +159,7 @@ void coalesced_reduction(raft::device_resources const& handle,
 
 /** @} */  // end of group coalesced_reduction
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh
index 4321e13d95..3b1e8c41c4 100644
--- a/cpp/include/raft/linalg/contractions.cuh
+++ b/cpp/include/raft/linalg/contractions.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -100,7 +100,7 @@ struct KernelPolicy {
     SmemSize = 2 * SmemPage * sizeof(DataT),
   };  // enum
 
-};  // struct KernelPolicy
+};    // struct KernelPolicy
 
 template <typename DataT, int _veclen, int _kblk, int _rpt, int _cpt, int _tr, int _tc>
 struct ColKernelPolicy {
@@ -151,8 +151,7 @@ struct ColKernelPolicy {
  * @{
  */
 template <typename DataT, int _veclen>
-struct Policy4x4 {
-};
+struct Policy4x4 {};
 
 template <int _veclen>
 struct Policy4x4<float, _veclen> {
@@ -174,8 +173,7 @@ struct Policy4x4<double, _veclen> {
  *
  */
 template <typename DataT, int _veclen>
-struct Policy4x4Skinny {
-};
+struct Policy4x4Skinny {};
 
 template <int _veclen>
 struct Policy4x4Skinny<float, _veclen> {
@@ -194,8 +192,7 @@ struct Policy4x4Skinny<double, _veclen> {
  * @{
  */
 template <typename DataT, int _veclen = 1>
-struct Policy2x8 {
-};
+struct Policy2x8 {};
 
 template <int _veclen>
 struct Policy2x8<float, _veclen> {
diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
index 87a195757c..5a7356a4c2 100644
--- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
+++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp
@@ -41,9 +41,9 @@ class cublas_device_pointer_mode {
     }
   }
   auto operator=(const cublas_device_pointer_mode&) -> cublas_device_pointer_mode& = delete;
-  auto operator=(cublas_device_pointer_mode&&) -> cublas_device_pointer_mode& = delete;
-  static auto operator new(std::size_t) -> void*                              = delete;
-  static auto operator new[](std::size_t) -> void*                            = delete;
+  auto operator=(cublas_device_pointer_mode&&) -> cublas_device_pointer_mode&      = delete;
+  static auto operator new(std::size_t) -> void*                                   = delete;
+  static auto operator new[](std::size_t) -> void*                                 = delete;
 
   ~cublas_device_pointer_mode()
   {
@@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle,
 template <>
 inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle,  // NOLINT
                                          int n,
-                                         float* const A[],  // NOLINT
+                                         float* const A[],       // NOLINT
                                          int lda,
                                          int* P,
                                          int* info,
@@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle,  // NOLINT
 template <>
 inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle,  // NOLINT
                                          int n,
-                                         double* const A[],  // NOLINT
+                                         double* const A[],      // NOLINT
                                          int lda,
                                          int* P,
                                          int* info,
diff --git a/cpp/include/raft/linalg/detail/map_then_reduce.cuh b/cpp/include/raft/linalg/detail/map_then_reduce.cuh
index 70bb2df4f5..c22ef09809 100644
--- a/cpp/include/raft/linalg/detail/map_then_reduce.cuh
+++ b/cpp/include/raft/linalg/detail/map_then_reduce.cuh
@@ -25,8 +25,7 @@ namespace raft {
 namespace linalg {
 namespace detail {
 
-struct sum_tag {
-};
+struct sum_tag {};
 
 template <typename InType, typename OutType, int TPB>
 __device__ void reduce(OutType* out, const InType acc, sum_tag)
diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh
index 0b18e6175c..428b9ba618 100644
--- a/cpp/include/raft/linalg/divide.cuh
+++ b/cpp/include/raft/linalg/divide.cuh
@@ -95,7 +95,7 @@ void divide_scalar(raft::device_resources const& handle,
 
 /** @} */  // end of group add
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh
index 03e94a10b1..7829f8e49f 100644
--- a/cpp/include/raft/linalg/eig.cuh
+++ b/cpp/include/raft/linalg/eig.cuh
@@ -219,7 +219,7 @@ void eig_jacobi(raft::device_resources const& handle,
 
 /** @} */  // end of eig
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh
index 96846003f6..019ec9f7ac 100644
--- a/cpp/include/raft/linalg/gemv.cuh
+++ b/cpp/include/raft/linalg/gemv.cuh
@@ -304,6 +304,6 @@ void gemv(raft::device_resources const& handle,
 }
 /** @} */  // end of gemv
 
-};  // namespace linalg
-};  // namespace raft
+};         // namespace linalg
+};         // namespace raft
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/lanczos.cuh b/cpp/include/raft/linalg/lanczos.cuh
index c9f3e0010e..04e9980583 100644
--- a/cpp/include/raft/linalg/lanczos.cuh
+++ b/cpp/include/raft/linalg/lanczos.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the sparse solvers version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the sparse solvers version instead.")
 
 #include <raft/sparse/solver/lanczos.cuh>
 
diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh
index b36a9eba96..c753215737 100644
--- a/cpp/include/raft/linalg/lstsq.cuh
+++ b/cpp/include/raft/linalg/lstsq.cuh
@@ -244,7 +244,7 @@ void lstsq_qr(raft::device_resources const& handle,
 
 /** @} */  // end of lstsq
 
-};  // namespace linalg
-};  // namespace raft
+};         // namespace linalg
+};         // namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh
index 59b2ca5ee5..6c65626ac5 100644
--- a/cpp/include/raft/linalg/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/matrix_vector_op.cuh
@@ -238,7 +238,7 @@ void matrix_vector_op(raft::device_resources const& handle,
 
 /** @} */  // end of group matrix_vector_op
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh
index 62f4896d01..317c085673 100644
--- a/cpp/include/raft/linalg/mean_squared_error.cuh
+++ b/cpp/include/raft/linalg/mean_squared_error.cuh
@@ -74,7 +74,7 @@ void mean_squared_error(raft::device_resources const& handle,
 
 /** @} */  // end of group mean_squared_error
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh
index 574b88c63d..bdca641616 100644
--- a/cpp/include/raft/linalg/multiply.cuh
+++ b/cpp/include/raft/linalg/multiply.cuh
@@ -97,7 +97,7 @@ void multiply_scalar(
 
 /** @} */  // end of group multiply
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh
index 1fdfcb3780..057d6f6827 100644
--- a/cpp/include/raft/linalg/power.cuh
+++ b/cpp/include/raft/linalg/power.cuh
@@ -153,7 +153,7 @@ void power_scalar(
 
 /** @} */  // end of group add
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh
index ae5457c44f..06f62f207e 100644
--- a/cpp/include/raft/linalg/reduce.cuh
+++ b/cpp/include/raft/linalg/reduce.cuh
@@ -161,7 +161,7 @@ void reduce(raft::device_resources const& handle,
 
 /** @} */  // end of group reduction
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
index 2b744d8134..71c8cf14a1 100644
--- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
@@ -112,7 +112,7 @@ void reduce_cols_by_key(
 
 /** @} */  // end of group reduce_cols_by_key
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
index 484b60238b..0e83c9aa2b 100644
--- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
@@ -191,7 +191,7 @@ void reduce_rows_by_key(
 
 /** @} */  // end of group reduce_rows_by_key
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh
index eb94547f13..8a32467873 100644
--- a/cpp/include/raft/linalg/rsvd.cuh
+++ b/cpp/include/raft/linalg/rsvd.cuh
@@ -765,7 +765,7 @@ void rsvd_perc_symmetric_jacobi(Args... args)
 
 /** @} */  // end of group rsvd
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh
index 55e661897d..eecc719617 100644
--- a/cpp/include/raft/linalg/sqrt.cuh
+++ b/cpp/include/raft/linalg/sqrt.cuh
@@ -83,7 +83,7 @@ void sqrt(raft::device_resources const& handle, InType in, OutType out)
 
 /** @} */  // end of group add
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh
index f58dfe28b3..25be368865 100644
--- a/cpp/include/raft/linalg/strided_reduction.cuh
+++ b/cpp/include/raft/linalg/strided_reduction.cuh
@@ -170,7 +170,7 @@ void strided_reduction(raft::device_resources const& handle,
 
 /** @} */  // end of group strided_reduction
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh
index da995b7a2a..cbd6b9df59 100644
--- a/cpp/include/raft/linalg/subtract.cuh
+++ b/cpp/include/raft/linalg/subtract.cuh
@@ -222,7 +222,7 @@ void subtract_scalar(
 
 /** @} */  // end of group subtract
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh
index 4b78f2ef61..801d271fe9 100644
--- a/cpp/include/raft/linalg/svd.cuh
+++ b/cpp/include/raft/linalg/svd.cuh
@@ -415,7 +415,7 @@ void svd_reconstruction(raft::device_resources const& handle,
 
 /** @} */  // end of group svd
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh
index 1e347d69be..ce95e98499 100644
--- a/cpp/include/raft/linalg/ternary_op.cuh
+++ b/cpp/include/raft/linalg/ternary_op.cuh
@@ -83,7 +83,7 @@ void ternary_op(
 
 /** @} */  // end of group ternary_op
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh
index a0f418b4f7..2f31cfd722 100644
--- a/cpp/include/raft/linalg/transpose.cuh
+++ b/cpp/include/raft/linalg/transpose.cuh
@@ -102,7 +102,7 @@ auto transpose(raft::device_resources const& handle,
 
 /** @} */  // end of group transpose
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh
index 23f932d2f2..58ff2f6bd6 100644
--- a/cpp/include/raft/linalg/unary_op.cuh
+++ b/cpp/include/raft/linalg/unary_op.cuh
@@ -124,7 +124,7 @@ void write_only_unary_op(const raft::device_resources& handle, OutType out, Lamb
 
 /** @} */  // end of group unary_op
 
-};  // end namespace linalg
-};  // end namespace raft
+};         // end namespace linalg
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh
index a4daf097e5..6546a48279 100644
--- a/cpp/include/raft/matrix/col_wise_sort.cuh
+++ b/cpp/include/raft/matrix/col_wise_sort.cuh
@@ -133,6 +133,6 @@ void sort_cols_per_row(Args... args)
 
 /** @} */  // end of group col_wise_sort
 
-};  // end namespace raft::matrix
+};         // end namespace raft::matrix
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh
index d362b73792..93d405da48 100644
--- a/cpp/include/raft/matrix/detail/select_warpsort.cuh
+++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh
@@ -870,8 +870,7 @@ struct launch_setup {
 };
 
 template <template <int, bool, typename, typename> class WarpSortClass>
-struct LaunchThreshold {
-};
+struct LaunchThreshold {};
 
 template <>
 struct LaunchThreshold<warp_sort_filtered> {
@@ -960,7 +959,7 @@ void calc_launch_parameter(
       if (batch_size >= size_t(another_min_grid_size)  // still have enough work
           && another_block_size < block_size           // protect against an infinite loop
           && another_min_grid_size * another_block_size >
-               min_grid_size * block_size  // improve occupancy
+               min_grid_size * block_size              // improve occupancy
       ) {
         block_size    = another_block_size;
         min_grid_size = another_min_grid_size;
diff --git a/cpp/include/raft/matrix/math.cuh b/cpp/include/raft/matrix/math.cuh
index 7afb9572be..7cbc212d75 100644
--- a/cpp/include/raft/matrix/math.cuh
+++ b/cpp/include/raft/matrix/math.cuh
@@ -19,9 +19,9 @@
  * Please use versions in individual header files instead.
  */
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use versions in individual header files instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use versions in individual header files instead.")
 
 #ifndef __MATH_H
 #define __MATH_H
diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh
index 0780e41275..4e549a4ec5 100644
--- a/cpp/include/raft/matrix/matrix.cuh
+++ b/cpp/include/raft/matrix/matrix.cuh
@@ -19,9 +19,9 @@
  * Please use versions in individual header files instead.
  */
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use versions in individual header files instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use versions in individual header files instead.")
 
 #ifndef __MATRIX_H
 #define __MATRIX_H
diff --git a/cpp/include/raft/matrix/matrix.hpp b/cpp/include/raft/matrix/matrix.hpp
index 428c914784..53bd30d2eb 100644
--- a/cpp/include/raft/matrix/matrix.hpp
+++ b/cpp/include/raft/matrix/matrix.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,8 +24,8 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the cuh version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the cuh version instead.")
 
 #include "matrix.cuh"
diff --git a/cpp/include/raft/neighbors/ann_types.hpp b/cpp/include/raft/neighbors/ann_types.hpp
index 5bf2062f2f..469d3c09d4 100644
--- a/cpp/include/raft/neighbors/ann_types.hpp
+++ b/cpp/include/raft/neighbors/ann_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,8 +26,7 @@ namespace raft::neighbors::ann {
  */
 
 /** The base for approximate KNN index structures. */
-struct index {
-};
+struct index {};
 
 /** The base for KNN index parameters. */
 struct index_params {
@@ -46,9 +45,8 @@ struct index_params {
   bool add_data_on_build = true;
 };
 
-struct search_params {
-};
+struct search_params {};
 
 /** @} */  // end group ann_types
 
-};  // namespace raft::neighbors::ann
+};         // namespace raft::neighbors::ann
diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp
index bd9b3b586b..931fb3f23f 100644
--- a/cpp/include/raft/neighbors/cagra_types.hpp
+++ b/cpp/include/raft/neighbors/cagra_types.hpp
@@ -155,11 +155,11 @@ struct index : ann::index {
   }
 
   // Don't allow copying the index for performance reasons (try avoiding copying data)
-  index(const index&) = delete;
-  index(index&&)      = default;
+  index(const index&)                    = delete;
+  index(index&&)                         = default;
   auto operator=(const index&) -> index& = delete;
-  auto operator=(index&&) -> index& = default;
-  ~index()                          = default;
+  auto operator=(index&&) -> index&      = default;
+  ~index()                               = default;
 
   /** Construct an empty index. */
   index(raft::device_resources const& res)
diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
index a05c714700..29c841c0b5 100644
--- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
@@ -51,7 +51,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes(
   INDEX_T* const result_indices_ptr,       // [num_pickup]
   DISTANCE_T* const result_distances_ptr,  // [num_pickup]
   const float* const query_buffer,
-  const DATA_T* const dataset_ptr,  // [dataset_size, dataset_dim]
+  const DATA_T* const dataset_ptr,         // [dataset_size, dataset_dim]
   const std::size_t dataset_dim,
   const std::size_t dataset_size,
   const std::size_t num_pickup,
diff --git a/cpp/include/raft/neighbors/detail/cagra/fragment.hpp b/cpp/include/raft/neighbors/detail/cagra/fragment.hpp
index d5ec2207e7..c423ac12c2 100644
--- a/cpp/include/raft/neighbors/detail/cagra/fragment.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/fragment.hpp
@@ -48,8 +48,7 @@ struct load_unit_t<1> {
 
 // One dataset or query vector is distributed within a warp and stored as `fragment`.
 template <int DIM, class T, unsigned TEAM_SIZE, class ENABLED>
-struct fragment_base {
-};
+struct fragment_base {};
 template <int DIM, class T, unsigned TEAM_SIZE = warp_size>
 struct fragment
   : fragment_base<DIM,
diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
index 568ad0826c..02055f2a4d 100644
--- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
@@ -78,12 +78,12 @@ __device__ inline bool swap_if_needed(K& key1, K& key2, V& val1, V& val2, bool a
 
 template <class DATA_T, int blockDim_x, int numElementsPerThread>
 __global__ void kern_sort(
-  DATA_T** dataset,  // [num_gpus][dataset_chunk_size, dataset_dim]
+  DATA_T** dataset,             // [num_gpus][dataset_chunk_size, dataset_dim]
   uint32_t dataset_size,
   uint32_t dataset_chunk_size,  // (*) num_gpus * dataset_chunk_size >= dataset_size
   uint32_t dataset_dim,
   float scale,
-  uint32_t** knn_graph,  // [num_gpus][graph_chunk_size, graph_degree]
+  uint32_t** knn_graph,       // [num_gpus][graph_chunk_size, graph_degree]
   uint32_t graph_size,
   uint32_t graph_chunk_size,  // (*) num_gpus * graph_chunk_size >= graph_size
   uint32_t graph_degree,
@@ -211,7 +211,7 @@ __global__ void kern_sort(
 
 template <int MAX_DEGREE>
 __global__ void kern_prune(
-  uint32_t** knn_graph,  // [num_gpus][graph_chunk_size, graph_degree]
+  uint32_t** knn_graph,       // [num_gpus][graph_chunk_size, graph_degree]
   uint32_t graph_size,
   uint32_t graph_chunk_size,  // (*) num_gpus * graph_chunk_size >= graph_size
   uint32_t graph_degree,
@@ -284,8 +284,8 @@ namespace {
 __global__ void kern_make_rev_graph(const uint32_t i_gpu,
                                     const uint32_t* dest_nodes,  // [global_graph_size]
                                     const uint32_t global_graph_size,
-                                    uint32_t* rev_graph,        // [graph_size, degree]
-                                    uint32_t* rev_graph_count,  // [graph_size]
+                                    uint32_t* rev_graph,         // [graph_size, degree]
+                                    uint32_t* rev_graph_count,   // [graph_size]
                                     const uint32_t graph_size,
                                     const uint32_t degree)
 {
@@ -316,8 +316,8 @@ T*** mgpu_alloc(int n_gpus, uint32_t chunk, uint32_t nelems)
     RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
     RAFT_CUDA_TRY(cudaMalloc(&(arrays[i_gpu]), bsize)); /* d1 */
   }
-  T*** d_arrays;                                       // [n_gpus+1][n_gpus][chunk, nelems]
-  d_arrays = (T***)malloc(sizeof(T**) * (n_gpus + 1)); /* h2 */
+  T*** d_arrays;                                        // [n_gpus+1][n_gpus][chunk, nelems]
+  d_arrays = (T***)malloc(sizeof(T**) * (n_gpus + 1));  /* h2 */
   bsize    = sizeof(T*) * n_gpus;
   for (int i_gpu = 0; i_gpu < n_gpus; i_gpu++) {
     RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
@@ -705,7 +705,7 @@ void prune(raft::device_resources const& res,
   d_rev_graph_count = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, 1);
   mgpu_H2D<uint32_t>(d_rev_graph_count, rev_graph_count, num_gpus, graph_size, graph_chunk_size, 1);
 
-  uint32_t* dest_nodes;  // [graph_size]
+  uint32_t* dest_nodes;     // [graph_size]
   dest_nodes = (uint32_t*)malloc(sizeof(uint32_t) * graph_size);
   uint32_t** d_dest_nodes;  // [num_gpus][graph_size]
   d_dest_nodes = (uint32_t**)malloc(sizeof(uint32_t*) * num_gpus);
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
index 2c0ac98417..6148441bd0 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
@@ -43,7 +43,7 @@ namespace multi_cta_search {
 template <class INDEX_T>
 __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num_parents]
                                     const uint32_t num_parents,
-                                    INDEX_T* const itopk_indices,  // [num_itopk]
+                                    INDEX_T* const itopk_indices,        // [num_itopk]
                                     const size_t num_itopk,
                                     uint32_t* const terminate_flag)
 {
@@ -80,8 +80,8 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num
 }
 
 template <unsigned MAX_ELEMENTS>
-__device__ inline void topk_by_bitonic_sort(float* distances,   // [num_elements]
-                                            uint32_t* indices,  // [num_elements]
+__device__ inline void topk_by_bitonic_sort(float* distances,         // [num_elements]
+                                            uint32_t* indices,        // [num_elements]
                                             const uint32_t num_elements,
                                             const uint32_t num_itopk  // num_itopk <= num_elements
 )
@@ -137,7 +137,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   const uint32_t graph_degree,
   const unsigned num_distilation,
   const uint64_t rand_xor_mask,
-  const INDEX_T* seed_ptr,  // [num_queries, num_seeds]
+  const INDEX_T* seed_ptr,              // [num_queries, num_seeds]
   const uint32_t num_seeds,
   uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
   const uint32_t hash_bitlen,
@@ -561,9 +561,9 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   void operator()(raft::device_resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
-                  INDEX_T* const topk_indices_ptr,       // [num_queries, topk]
-                  DISTANCE_T* const topk_distances_ptr,  // [num_queries, topk]
-                  const DATA_T* const queries_ptr,       // [num_queries, dataset_dim]
+                  INDEX_T* const topk_indices_ptr,          // [num_queries, topk]
+                  DISTANCE_T* const topk_distances_ptr,     // [num_queries, topk]
+                  const DATA_T* const queries_ptr,          // [num_queries, dataset_dim]
                   const uint32_t num_queries,
                   const INDEX_T* dev_seed_ptr,              // [num_queries, num_seeds]
                   uint32_t* const num_executed_iterations,  // [num_queries,]
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
index f688941239..629bed2aee 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -93,7 +93,7 @@ __global__ void random_pickup_kernel(
   const std::size_t num_pickup,
   const unsigned num_distilation,
   const uint64_t rand_xor_mask,
-  const INDEX_T* seed_ptr,  // [num_queries, num_seeds]
+  const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
   const uint32_t num_seeds,
   INDEX_T* const result_indices_ptr,         // [num_queries, ldr]
   DISTANCE_T* const result_distances_ptr,    // [num_queries, ldr]
@@ -162,7 +162,7 @@ void random_pickup(const DATA_T* const dataset_ptr,  // [dataset_size, dataset_d
                    const std::size_t num_pickup,
                    const unsigned num_distilation,
                    const uint64_t rand_xor_mask,
-                   const INDEX_T* seed_ptr,  // [num_queries, num_seeds]
+                   const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
                    const uint32_t num_seeds,
                    INDEX_T* const result_indices_ptr,         // [num_queries, ldr]
                    DISTANCE_T* const result_distances_ptr,    // [num_queries, ldr]
@@ -300,17 +300,17 @@ template <unsigned TEAM_SIZE,
 __global__ void compute_distance_to_child_nodes_kernel(
   const INDEX_T* const parent_node_list,  // [num_queries, num_parents]
   const std::uint32_t num_parents,
-  const DATA_T* const dataset_ptr,  // [dataset_size, data_dim]
+  const DATA_T* const dataset_ptr,        // [dataset_size, data_dim]
   const std::uint32_t data_dim,
   const std::uint32_t dataset_size,
-  const INDEX_T* const neighbor_graph_ptr,  // [dataset_size, graph_degree]
+  const INDEX_T* const neighbor_graph_ptr,   // [dataset_size, graph_degree]
   const std::uint32_t graph_degree,
   const DATA_T* query_ptr,                   // [num_queries, data_dim]
   std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
   const std::uint32_t hash_bitlen,
-  INDEX_T* const result_indices_ptr,       // [num_queries, ldd]
-  DISTANCE_T* const result_distances_ptr,  // [num_queries, ldd]
-  const std::uint32_t ldd                  // (*) ldd >= num_parents * graph_degree
+  INDEX_T* const result_indices_ptr,         // [num_queries, ldd]
+  DISTANCE_T* const result_distances_ptr,    // [num_queries, ldd]
+  const std::uint32_t ldd                    // (*) ldd >= num_parents * graph_degree
 )
 {
   const uint32_t ldb        = hashmap::get_size(hash_bitlen);
@@ -357,18 +357,18 @@ template <unsigned TEAM_SIZE,
 void compute_distance_to_child_nodes(
   const INDEX_T* const parent_node_list,  // [num_queries, num_parents]
   const uint32_t num_parents,
-  const DATA_T* const dataset_ptr,  // [dataset_size, data_dim]
+  const DATA_T* const dataset_ptr,        // [dataset_size, data_dim]
   const std::uint32_t data_dim,
   const std::uint32_t dataset_size,
-  const INDEX_T* const neighbor_graph_ptr,  // [dataset_size, graph_degree]
+  const INDEX_T* const neighbor_graph_ptr,   // [dataset_size, graph_degree]
   const std::uint32_t graph_degree,
-  const DATA_T* query_ptr,  // [num_queries, data_dim]
+  const DATA_T* query_ptr,                   // [num_queries, data_dim]
   const std::uint32_t num_queries,
   std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
   const std::uint32_t hash_bitlen,
-  INDEX_T* const result_indices_ptr,       // [num_queries, ldd]
-  DISTANCE_T* const result_distances_ptr,  // [num_queries, ldd]
-  const std::uint32_t ldd,                 // (*) ldd >= num_parants * graph_degree
+  INDEX_T* const result_indices_ptr,         // [num_queries, ldd]
+  DISTANCE_T* const result_distances_ptr,    // [num_queries, ldd]
+  const std::uint32_t ldd,                   // (*) ldd >= num_parents * graph_degree
   cudaStream_t cuda_stream = 0)
 {
   const auto block_size = 128;
@@ -419,7 +419,7 @@ void remove_parent_bit(const std::uint32_t num_queries,
 }
 
 template <class T>
-__global__ void batched_memcpy_kernel(T* const dst,  // [batch_size, ld_dst]
+__global__ void batched_memcpy_kernel(T* const dst,        // [batch_size, ld_dst]
                                       const uint64_t ld_dst,
                                       const T* const src,  // [batch_size, ld_src]
                                       const uint64_t ld_src,
@@ -434,7 +434,7 @@ __global__ void batched_memcpy_kernel(T* const dst,  // [batch_size, ld_dst]
 }
 
 template <class T>
-void batched_memcpy(T* const dst,  // [batch_size, ld_dst]
+void batched_memcpy(T* const dst,        // [batch_size, ld_dst]
                     const uint64_t ld_dst,
                     const T* const src,  // [batch_size, ld_src]
                     const uint64_t ld_src,
@@ -578,9 +578,9 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   void operator()(raft::device_resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
-                  INDEX_T* const topk_indices_ptr,       // [num_queries, topk]
-                  DISTANCE_T* const topk_distances_ptr,  // [num_queries, topk]
-                  const DATA_T* const queries_ptr,       // [num_queries, dataset_dim]
+                  INDEX_T* const topk_indices_ptr,          // [num_queries, topk]
+                  DISTANCE_T* const topk_distances_ptr,     // [num_queries, topk]
+                  const DATA_T* const queries_ptr,          // [num_queries, dataset_dim]
                   const uint32_t num_queries,
                   const INDEX_T* dev_seed_ptr,              // [num_queries, num_seeds]
                   uint32_t* const num_executed_iterations,  // [num_queries,]
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
index d9613b345c..09d5e71254 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
@@ -112,7 +112,7 @@ struct search_plan_impl : public search_plan_impl_base {
                           DISTANCE_T* const result_distances_ptr,  // [num_queries, topk]
                           const DATA_T* const queries_ptr,         // [num_queries, dataset_dim]
                           const std::uint32_t num_queries,
-                          const INDEX_T* dev_seed_ptr,                   // [num_queries, num_seeds]
+                          const INDEX_T* dev_seed_ptr,             // [num_queries, num_seeds]
                           std::uint32_t* const num_executed_iterations,  // [num_queries]
                           uint32_t topk){};
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
index acd7ac321f..fc87b952b0 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
@@ -89,8 +89,7 @@ struct topk_by_radix_sort_base {
   static constexpr std::uint32_t vecLen           = 2;  // TODO
 };
 template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE, class = void>
-struct topk_by_radix_sort : topk_by_radix_sort_base<MAX_INTERNAL_TOPK> {
-};
+struct topk_by_radix_sort : topk_by_radix_sort_base<MAX_INTERNAL_TOPK> {};
 
 template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE>
 struct topk_by_radix_sort<MAX_INTERNAL_TOPK,
@@ -257,8 +256,8 @@ __device__ inline void topk_by_bitonic_sort_1st(
 
 template <unsigned MAX_ITOPK, unsigned MULTI_WARPS = 0>
 __device__ inline void topk_by_bitonic_sort_2nd(
-  float* itopk_distances,        // [num_itopk]
-  std::uint32_t* itopk_indices,  // [num_itopk]
+  float* itopk_distances,            // [num_itopk]
+  std::uint32_t* itopk_indices,      // [num_itopk]
   const std::uint32_t num_itopk,
   float* candidate_distances,        // [num_candidates]
   std::uint32_t* candidate_indices,  // [num_candidates]
@@ -465,8 +464,8 @@ template <unsigned MAX_ITOPK,
           unsigned MAX_CANDIDATES,
           unsigned MULTI_WARPS_1,
           unsigned MULTI_WARPS_2>
-__device__ void topk_by_bitonic_sort(float* itopk_distances,        // [num_itopk]
-                                     std::uint32_t* itopk_indices,  // [num_itopk]
+__device__ void topk_by_bitonic_sort(float* itopk_distances,            // [num_itopk]
+                                     std::uint32_t* itopk_indices,      // [num_itopk]
                                      const std::uint32_t num_itopk,
                                      float* candidate_distances,        // [num_candidates]
                                      std::uint32_t* candidate_indices,  // [num_candidates]
@@ -527,7 +526,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
   void search_kernel(INDEX_T* const result_indices_ptr,       // [num_queries, top_k]
                      DISTANCE_T* const result_distances_ptr,  // [num_queries, top_k]
                      const std::uint32_t top_k,
-                     const DATA_T* const dataset_ptr,  // [dataset_size, dataset_dim]
+                     const DATA_T* const dataset_ptr,         // [dataset_size, dataset_dim]
                      const std::size_t dataset_dim,
                      const std::size_t dataset_size,
                      const DATA_T* const queries_ptr,  // [num_queries, dataset_dim]
@@ -535,7 +534,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
                      const std::uint32_t graph_degree,
                      const unsigned num_distilation,
                      const uint64_t rand_xor_mask,
-                     const INDEX_T* seed_ptr,  // [num_queries, num_seeds]
+                     const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
                      const uint32_t num_seeds,
                      std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
                      const std::uint32_t internal_topk,
@@ -1110,9 +1109,9 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   void operator()(raft::device_resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
-                  INDEX_T* const result_indices_ptr,       // [num_queries, topk]
-                  DISTANCE_T* const result_distances_ptr,  // [num_queries, topk]
-                  const DATA_T* const queries_ptr,         // [num_queries, dataset_dim]
+                  INDEX_T* const result_indices_ptr,             // [num_queries, topk]
+                  DISTANCE_T* const result_distances_ptr,        // [num_queries, topk]
+                  const DATA_T* const queries_ptr,               // [num_queries, dataset_dim]
                   const std::uint32_t num_queries,
                   const INDEX_T* dev_seed_ptr,                   // [num_queries, num_seeds]
                   std::uint32_t* const num_executed_iterations,  // [num_queries]
diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
index d09478d1db..072593550e 100644
--- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
@@ -183,9 +183,9 @@ __device__ inline void update_histogram(int itr,
                                         uint32_t threshold,
                                         uint32_t& num_bins,
                                         uint32_t& shift,
-                                        const T* x,  // [nx,]
+                                        const T* x,        // [nx,]
                                         uint32_t nx,
-                                        uint32_t* hist,  // [num_bins]
+                                        uint32_t* hist,    // [num_bins]
                                         uint8_t* state,
                                         uint32_t* output,  // [topk]
                                         uint32_t* output_count)
@@ -760,16 +760,16 @@ __launch_bounds__(1024, 1) __global__
   void kern_topk_cta_11(uint32_t topk,
                         uint32_t size_batch,
                         uint32_t len_x,
-                        const uint32_t* _x,  // [size_batch, ld_x,]
+                        const uint32_t* _x,        // [size_batch, ld_x,]
                         uint32_t ld_x,
                         const uint32_t* _in_vals,  // [size_batch, ld_iv,]
                         uint32_t ld_iv,
-                        uint32_t* _y,  // [size_batch, ld_y,]
+                        uint32_t* _y,              // [size_batch, ld_y,]
                         uint32_t ld_y,
-                        uint32_t* _out_vals,  // [size_batch, ld_ov,]
+                        uint32_t* _out_vals,       // [size_batch, ld_ov,]
                         uint32_t ld_ov,
-                        uint8_t* _state,   // [size_batch, ...,]
-                        uint32_t* _hints,  // [size_batch,]
+                        uint8_t* _state,           // [size_batch, ...,]
+                        uint32_t* _hints,          // [size_batch,]
                         bool sort)
 {
   uint32_t i_batch = blockIdx.x;
diff --git a/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkBlock.cuh b/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkBlock.cuh
index 79e3f95be0..1f4308fa2f 100644
--- a/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkBlock.cuh
+++ b/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkBlock.cuh
@@ -199,8 +199,7 @@ template <int NumThreads,
           typename Comp,
           bool SmallerThanBlock,
           bool FullMerge>
-struct BlockMerge {
-};
+struct BlockMerge {};
 
 /// Merging lists smaller than a block
 template <int NumThreads,
diff --git a/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkWarp.cuh b/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkWarp.cuh
index 04f7f90aac..3d87e284ed 100644
--- a/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkWarp.cuh
+++ b/cpp/include/raft/neighbors/detail/faiss_select/MergeNetworkWarp.cuh
@@ -139,8 +139,7 @@ inline __device__ void warpBitonicMergeLE16(K& k, V& v)
 // Template for performing a bitonic merge of an arbitrary set of
 // registers
 template <typename K, typename V, int N, bool Dir, typename Comp, bool Low, bool Pow2>
-struct BitonicMergeStep {
-};
+struct BitonicMergeStep {};
 
 //
 // Power-of-2 merge specialization
diff --git a/cpp/include/raft/neighbors/detail/faiss_select/Select.cuh b/cpp/include/raft/neighbors/detail/faiss_select/Select.cuh
index 4aa7d68f54..fdb986ac69 100644
--- a/cpp/include/raft/neighbors/detail/faiss_select/Select.cuh
+++ b/cpp/include/raft/neighbors/detail/faiss_select/Select.cuh
@@ -25,8 +25,7 @@ template <int NumWarps,
           int NumWarpQ,
           bool Dir,
           typename Comp>
-struct FinalBlockMerge {
-};
+struct FinalBlockMerge {};
 
 template <int NumThreads, typename K, typename V, int NumWarpQ, bool Dir, typename Comp>
 struct FinalBlockMerge<1, NumThreads, K, V, NumWarpQ, Dir, Comp> {
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index 36ceccc36f..208f7fd875 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -213,7 +213,7 @@ void select_residuals(raft::device_resources const& handle,
 template <typename T, typename IdxT>
 void flat_compute_residuals(
   raft::device_resources const& handle,
-  float* residuals,  // [n_rows, rot_dim]
+  float* residuals,                                                      // [n_rows, rot_dim]
   IdxT n_rows,
   device_matrix_view<const float, uint32_t, row_major> rotation_matrix,  // [rot_dim, dim]
   device_matrix_view<const float, uint32_t, row_major> centers,          // [n_lists, dim_ext]
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 9a94458748..8ddbe7fac0 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -1193,7 +1193,7 @@ struct compute_similarity {
       }
 
       {
-        if (selected_perf.occupancy <= 0.0  // no candidate yet
+        if (selected_perf.occupancy <= 0.0                 // no candidate yet
             || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy &&
                 selected_perf.shmem_use >= cur.shmem_use)  // much improved occupancy
         ) {
diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp
index 2a6aa12847..011adcffff 100644
--- a/cpp/include/raft/neighbors/ivf_flat_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp
@@ -228,11 +228,11 @@ struct index : ann::index {
   [[nodiscard]] constexpr inline auto n_lists() const noexcept -> uint32_t { return lists_.size(); }
 
   // Don't allow copying the index for performance reasons (try avoiding copying data)
-  index(const index&) = delete;
-  index(index&&)      = default;
+  index(const index&)                    = delete;
+  index(index&&)                         = default;
   auto operator=(const index&) -> index& = delete;
-  auto operator=(index&&) -> index& = default;
-  ~index()                          = default;
+  auto operator=(index&&) -> index&      = default;
+  ~index()                               = default;
 
   /** Construct an empty index. It needs to be trained and then populated. */
   index(raft::device_resources const& res,
diff --git a/cpp/include/raft/neighbors/ivf_list_types.hpp b/cpp/include/raft/neighbors/ivf_list_types.hpp
index 233775ea39..50a905c6ae 100644
--- a/cpp/include/raft/neighbors/ivf_list_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_list_types.hpp
@@ -31,9 +31,8 @@ namespace raft::neighbors::ivf {
  * `size` bound or whenever the list is allocated but not filled-in yet.
  */
 template <typename IdxT>
-constexpr static IdxT kInvalidRecord = (std::is_signed_v<IdxT> ? IdxT{0}
-                                                               : std::numeric_limits<IdxT>::max()) -
-                                       1;
+constexpr static IdxT kInvalidRecord =
+  (std::is_signed_v<IdxT> ? IdxT{0} : std::numeric_limits<IdxT>::max()) - 1;
 
 /** The data for a single IVF list. */
 template <template <typename, typename...> typename SpecT,
@@ -58,8 +57,7 @@ struct list {
 };
 
 template <typename ListT, class T = void>
-struct enable_if_valid_list {
-};
+struct enable_if_valid_list {};
 
 template <class T,
           template <typename, typename...>
diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp
index eec9238435..4d11bac42e 100644
--- a/cpp/include/raft/neighbors/ivf_pq_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp
@@ -325,11 +325,11 @@ struct index : ann::index {
   }
 
   // Don't allow copying the index for performance reasons (try avoiding copying data)
-  index(const index&) = delete;
-  index(index&&)      = default;
+  index(const index&)                    = delete;
+  index(index&&)                         = default;
   auto operator=(const index&) -> index& = delete;
-  auto operator=(index&&) -> index& = default;
-  ~index()                          = default;
+  auto operator=(index&&) -> index&      = default;
+  ~index()                               = default;
 
   /** Construct an empty index. It needs to be trained and then populated. */
   index(raft::device_resources const& handle,
diff --git a/cpp/include/raft/random/permute.cuh b/cpp/include/raft/random/permute.cuh
index f84b603549..16de1d676d 100644
--- a/cpp/include/raft/random/permute.cuh
+++ b/cpp/include/raft/random/permute.cuh
@@ -31,8 +31,7 @@ namespace raft::random {
 namespace permute_impl {
 
 template <typename T, typename InputOutputValueType, typename IdxType, typename Layout>
-struct perms_out_view {
-};
+struct perms_out_view {};
 
 template <typename InputOutputValueType, typename IdxType, typename Layout>
 struct perms_out_view<std::nullopt_t, InputOutputValueType, IdxType, Layout> {
diff --git a/cpp/include/raft/random/sample_without_replacement.cuh b/cpp/include/raft/random/sample_without_replacement.cuh
index 8998db98ae..be8bda8cd3 100644
--- a/cpp/include/raft/random/sample_without_replacement.cuh
+++ b/cpp/include/raft/random/sample_without_replacement.cuh
@@ -29,8 +29,7 @@ namespace raft::random {
 
 namespace sample_without_replacement_impl {
 template <typename T>
-struct weight_alias {
-};
+struct weight_alias {};
 
 template <>
 struct weight_alias<std::nullopt_t> {
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index 6ae6874466..fe433d4641 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -45,15 +45,13 @@ template <
 cusparseStatus_t cusparsegthr(
   cusparseHandle_t handle, int nnz, const T* vals, T* vals_sorted, int* d_P, cudaStream_t stream)
 {
-  auto constexpr float_type = []() constexpr
-  {
+  auto constexpr float_type = []() constexpr {
     if constexpr (std::is_same_v<T, float>) {
       return CUDA_R_32F;
     } else if constexpr (std::is_same_v<T, double>) {
       return CUDA_R_64F;
     }
-  }
-  ();
+  }();
   CUSPARSE_CHECK(cusparseSetStream(handle, stream));
   auto dense_vector_descr  = cusparseDnVecDescr_t{};
   auto sparse_vector_descr = cusparseSpVecDescr_t{};
diff --git a/cpp/include/raft/sparse/hierarchy/common.h b/cpp/include/raft/sparse/hierarchy/common.h
index 3c3b92b739..01ebfd04df 100644
--- a/cpp/include/raft/sparse/hierarchy/common.h
+++ b/cpp/include/raft/sparse/hierarchy/common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,9 +20,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use raft/cluster/single_linkage_types.hpp instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use raft/cluster/single_linkage_types.hpp instead.")
 
 #include <raft/cluster/single_linkage_types.hpp>
 
diff --git a/cpp/include/raft/sparse/hierarchy/single_linkage.cuh b/cpp/include/raft/sparse/hierarchy/single_linkage.cuh
index dbf353da73..7f990ff44b 100644
--- a/cpp/include/raft/sparse/hierarchy/single_linkage.cuh
+++ b/cpp/include/raft/sparse/hierarchy/single_linkage.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,9 +20,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/cluster version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/cluster version instead.")
 
 #include <raft/cluster/single_linkage.cuh>
 #include <raft/sparse/hierarchy/common.h>
diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh
index c2a8aa4246..a5767be736 100644
--- a/cpp/include/raft/sparse/linalg/detail/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,10 +44,10 @@ __global__ void csr_row_normalize_l1_kernel(
   // over each row and then divide the values in parallel.
   const int* ia,  // csr row ex_scan (sorted by row)
   const T* vals,
-  int nnz,  // array of values and number of non-zeros
-  int m,    // num rows in csr
+  int nnz,        // array of values and number of non-zeros
+  int m,          // num rows in csr
   T* result)
-{  // output array
+{                 // output array
 
   // row-based matrix 1 thread per row
   int row = (blockIdx.x * TPB_X) + threadIdx.x;
@@ -90,8 +90,8 @@ __global__ void csr_row_normalize_l1_kernel(
 template <int TPB_X = 64, typename T>
 void csr_row_normalize_l1(const int* ia,  // csr row ex_scan (sorted by row)
                           const T* vals,
-                          int nnz,  // array of values and number of non-zeros
-                          int m,    // num rows in csr
+                          int nnz,        // array of values and number of non-zeros
+                          int m,          // num rows in csr
                           T* result,
                           cudaStream_t stream)
 {  // output array
@@ -110,10 +110,10 @@ __global__ void csr_row_normalize_max_kernel(
   // over each row and then divide the values in parallel.
   const int* ia,  // csr row ind array (sorted by row)
   const T* vals,
-  int nnz,  // array of values and number of non-zeros
-  int m,    // num total rows in csr
+  int nnz,        // array of values and number of non-zeros
+  int m,          // num total rows in csr
   T* result)
-{  // output array
+{                 // output array
 
   // row-based matrix 1 thread per row
   int row = (blockIdx.x * TPB_X) + threadIdx.x;
@@ -158,8 +158,8 @@ __global__ void csr_row_normalize_max_kernel(
 template <int TPB_X = 64, typename T>
 void csr_row_normalize_max(const int* ia,  // csr row ind array (sorted by row)
                            const T* vals,
-                           int nnz,  // array of values and number of non-zeros
-                           int m,    // num total rows in csr
+                           int nnz,        // array of values and number of non-zeros
+                           int m,          // num total rows in csr
                            T* result,
                            cudaStream_t stream)
 {
diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
index 3be33820cc..c64acbfca6 100644
--- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
@@ -68,7 +68,7 @@ void fit_embedding(raft::device_resources const& handle,
     handle, ro, ci, vs, n, nnz};
 
   index_type neigvs       = n_components + 1;
-  index_type maxiter      = 4000;  // default reset value (when set to 0);
+  index_type maxiter      = 4000;         // default reset value (when set to 0);
   value_type tol          = 0.01;
   index_type restart_iter = 15 + neigvs;  // what cugraph is using
 
diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh
index e13fd22843..af72d0141e 100644
--- a/cpp/include/raft/sparse/linalg/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/norm.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,8 +37,8 @@ namespace linalg {
 template <typename T>
 void csr_row_normalize_l1(const int* ia,  // csr row ex_scan (sorted by row)
                           const T* vals,
-                          int nnz,  // array of values and number of non-zeros
-                          int m,    // num rows in csr
+                          int nnz,        // array of values and number of non-zeros
+                          int m,          // num rows in csr
                           T* result,
                           cudaStream_t stream)
 {  // output array
@@ -58,8 +58,8 @@ void csr_row_normalize_l1(const int* ia,  // csr row ex_scan (sorted by row)
 template <typename T>
 void csr_row_normalize_max(const int* ia,  // csr row ind array (sorted by row)
                            const T* vals,
-                           int nnz,  // array of values and number of non-zeros
-                           int m,    // num total rows in csr
+                           int nnz,        // array of values and number of non-zeros
+                           int m,          // num total rows in csr
                            T* result,
                            cudaStream_t stream)
 {
diff --git a/cpp/include/raft/sparse/mst/mst.cuh b/cpp/include/raft/sparse/mst/mst.cuh
index 8f1a365f3f..eb6de1c0a1 100644
--- a/cpp/include/raft/sparse/mst/mst.cuh
+++ b/cpp/include/raft/sparse/mst/mst.cuh
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
  */
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/sparse/solver version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/sparse/solver version instead.")
 
 #include <raft/sparse/mst/mst_solver.cuh>
 #include <raft/sparse/solver/mst.cuh>
diff --git a/cpp/include/raft/sparse/mst/mst.hpp b/cpp/include/raft/sparse/mst/mst.hpp
index 1ad053d97c..5fbd264c6f 100644
--- a/cpp/include/raft/sparse/mst/mst.hpp
+++ b/cpp/include/raft/sparse/mst/mst.hpp
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
  */
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/sparse/solver version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/sparse/solver version instead.")
 
 #include <raft/sparse/mst/mst.cuh>
 #include <raft/sparse/mst/mst_solver.cuh>
diff --git a/cpp/include/raft/sparse/mst/mst_solver.cuh b/cpp/include/raft/sparse/mst/mst_solver.cuh
index 6af2226b99..76667396c3 100644
--- a/cpp/include/raft/sparse/mst/mst_solver.cuh
+++ b/cpp/include/raft/sparse/mst/mst_solver.cuh
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
  */
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft/sparse/solver version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft/sparse/solver version instead.")
 
 #include <raft/sparse/solver/mst_solver.cuh>
 
diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
index f9f07c13ca..6649c10c47 100644
--- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
@@ -58,7 +58,7 @@ struct csr_batcher_t {
   void set_batch(int batch_num)
   {
     batch_start_ = batch_num * batch_size_;
-    batch_stop_  = batch_start_ + batch_size_ - 1;  // zero-based indexing
+    batch_stop_  = batch_start_ + batch_size_ - 1;                  // zero-based indexing
 
     if (batch_stop_ >= total_rows_) batch_stop_ = total_rows_ - 1;  // zero-based indexing
 
diff --git a/cpp/include/raft/sparse/neighbors/knn.cuh b/cpp/include/raft/sparse/neighbors/knn.cuh
index d5714fbbd1..1e8ce48e16 100644
--- a/cpp/include/raft/sparse/neighbors/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/knn.cuh
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the sparse/spatial version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the sparse/spatial version instead.")
 
 #include <raft/sparse/neighbors/brute_force.cuh>
 
diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh
index c4479bc451..9bc3f1553a 100644
--- a/cpp/include/raft/sparse/selection/connect_components.cuh
+++ b/cpp/include/raft/sparse/selection/connect_components.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the sparse/spatial version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the sparse/spatial version instead.")
 
 #include <raft/sparse/neighbors/connect_components.cuh>
 
diff --git a/cpp/include/raft/sparse/selection/knn.cuh b/cpp/include/raft/sparse/selection/knn.cuh
index c5b6a7ab2f..0258335941 100644
--- a/cpp/include/raft/sparse/selection/knn.cuh
+++ b/cpp/include/raft/sparse/selection/knn.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the sparse/spatial version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the sparse/spatial version instead.")
 
 #include <raft/sparse/neighbors/knn.cuh>
 
diff --git a/cpp/include/raft/sparse/selection/knn_graph.cuh b/cpp/include/raft/sparse/selection/knn_graph.cuh
index bd009bf297..942213e6c1 100644
--- a/cpp/include/raft/sparse/selection/knn_graph.cuh
+++ b/cpp/include/raft/sparse/selection/knn_graph.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the sparse/spatial version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the sparse/spatial version instead.")
 
 #include <raft/sparse/neighbors/knn_graph.cuh>
 
diff --git a/cpp/include/raft/sparse/solver/mst_solver.cuh b/cpp/include/raft/sparse/solver/mst_solver.cuh
index c10d7caf59..063f215fc8 100644
--- a/cpp/include/raft/sparse/solver/mst_solver.cuh
+++ b/cpp/include/raft/sparse/solver/mst_solver.cuh
@@ -78,10 +78,10 @@ class MST_solver {
   rmm::device_uvector<alteration_t> altered_weights;  // weights to be used for mst
   rmm::device_scalar<edge_t> mst_edge_count;  // total number of edges added after every iteration
   rmm::device_scalar<edge_t>
-    prev_mst_edge_count;                     // total number of edges up to the previous iteration
-  rmm::device_uvector<bool> mst_edge;        // mst output -  true if the edge belongs in mst
-  rmm::device_uvector<vertex_t> next_color;  //  next iteration color
-  rmm::device_uvector<vertex_t> color;       // index of color that vertex points to
+    prev_mst_edge_count;                      // total number of edges up to the previous iteration
+  rmm::device_uvector<bool> mst_edge;         // mst output -  true if the edge belongs in mst
+  rmm::device_uvector<vertex_t> next_color;   //  next iteration color
+  rmm::device_uvector<vertex_t> color;        // index of color that vertex points to
 
   // new src-dst pairs found per iteration
   rmm::device_uvector<vertex_t> temp_src;
diff --git a/cpp/include/raft/spatial/knn/ann_common.h b/cpp/include/raft/spatial/knn/ann_common.h
index 0e9e323b84..73a70cbefe 100644
--- a/cpp/include/raft/spatial/knn/ann_common.h
+++ b/cpp/include/raft/spatial/knn/ann_common.h
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the other approximate KNN implementations defined in spatial/knn/*.")
+#pragma message(                                              \
+    __FILE__                                                  \
+    " is deprecated and will be removed in a future release." \
+    " Please use the other approximate KNN implementations defined in spatial/knn/*.")
 
 #pragma once
 
@@ -78,8 +79,7 @@ struct IVFParam : knnIndexParam {
   int nprobe;
 };
 
-struct IVFFlatParam : IVFParam {
-};
+struct IVFFlatParam : IVFParam {};
 
 struct IVFPQParam : IVFParam {
   int M;
diff --git a/cpp/include/raft/spatial/knn/ann_types.hpp b/cpp/include/raft/spatial/knn/ann_types.hpp
index 6e9a00bc0c..42ef2292f8 100644
--- a/cpp/include/raft/spatial/knn/ann_types.hpp
+++ b/cpp/include/raft/spatial/knn/ann_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,8 +21,7 @@
 namespace raft::spatial::knn {
 
 /** The base for approximate KNN index structures. */
-struct index {
-};
+struct index {};
 
 /** The base for KNN index parameters. */
 struct index_params {
@@ -41,7 +40,6 @@ struct index_params {
   bool add_data_on_build = true;
 };
 
-struct search_params {
-};
+struct search_params {};
 
 };  // namespace raft::spatial::knn
diff --git a/cpp/include/raft/spatial/knn/ball_cover.cuh b/cpp/include/raft/spatial/knn/ball_cover.cuh
index dda353e1c6..f3b1123fa2 100644
--- a/cpp/include/raft/spatial/knn/ball_cover.cuh
+++ b/cpp/include/raft/spatial/knn/ball_cover.cuh
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ball_cover.cuh>
 #include <raft/spatial/knn/ball_cover_types.hpp>
diff --git a/cpp/include/raft/spatial/knn/ball_cover_types.hpp b/cpp/include/raft/spatial/knn/ball_cover_types.hpp
index 6ebdcd7877..31062ff364 100644
--- a/cpp/include/raft/spatial/knn/ball_cover_types.hpp
+++ b/cpp/include/raft/spatial/knn/ball_cover_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ball_cover_types.hpp>
 
diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
index 395714a161..e489f24242 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
@@ -45,8 +45,7 @@ enum class pointer_residency {
 };
 
 template <typename... Types>
-struct pointer_residency_count {
-};
+struct pointer_residency_count {};
 
 template <>
 struct pointer_residency_count<> {
@@ -136,8 +135,7 @@ struct with_mapped_memory_t {
 };
 
 template <typename T>
-struct config {
-};
+struct config {};
 
 template <>
 struct config<float> {
diff --git a/cpp/include/raft/spatial/knn/epsilon_neighborhood.cuh b/cpp/include/raft/spatial/knn/epsilon_neighborhood.cuh
index e0a63ee42a..d516743115 100644
--- a/cpp/include/raft/spatial/knn/epsilon_neighborhood.cuh
+++ b/cpp/include/raft/spatial/knn/epsilon_neighborhood.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/epsilon_neighborhood.cuh>
 
diff --git a/cpp/include/raft/spatial/knn/ivf_flat.cuh b/cpp/include/raft/spatial/knn/ivf_flat.cuh
index 92fe49be98..e63dcff475 100644
--- a/cpp/include/raft/spatial/knn/ivf_flat.cuh
+++ b/cpp/include/raft/spatial/knn/ivf_flat.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ivf_flat.cuh>
 
diff --git a/cpp/include/raft/spatial/knn/ivf_flat_types.hpp b/cpp/include/raft/spatial/knn/ivf_flat_types.hpp
index 75d777573f..9546e62be0 100644
--- a/cpp/include/raft/spatial/knn/ivf_flat_types.hpp
+++ b/cpp/include/raft/spatial/knn/ivf_flat_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ivf_flat_types.hpp>
 
diff --git a/cpp/include/raft/spatial/knn/ivf_pq.cuh b/cpp/include/raft/spatial/knn/ivf_pq.cuh
index 0f175f41bb..a89968bd80 100644
--- a/cpp/include/raft/spatial/knn/ivf_pq.cuh
+++ b/cpp/include/raft/spatial/knn/ivf_pq.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ivf_pq.cuh>
 
diff --git a/cpp/include/raft/spatial/knn/ivf_pq_types.hpp b/cpp/include/raft/spatial/knn/ivf_pq_types.hpp
index 83fb78eb46..168a75034f 100644
--- a/cpp/include/raft/spatial/knn/ivf_pq_types.hpp
+++ b/cpp/include/raft/spatial/knn/ivf_pq_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,9 +24,9 @@
 
 #pragma once
 
-#pragma message(__FILE__                                                  \
-                " is deprecated and will be removed in a future release." \
-                " Please use the raft::neighbors version instead.")
+#pragma message(__FILE__                                                    \
+                  " is deprecated and will be removed in a future release." \
+                  " Please use the raft::neighbors version instead.")
 
 #include <raft/neighbors/ivf_pq_types.hpp>
 
diff --git a/cpp/include/raft/spectral/detail/lapack.hpp b/cpp/include/raft/spectral/detail/lapack.hpp
index 1bc930baf4..2d9e5ae9a4 100644
--- a/cpp/include/raft/spectral/detail/lapack.hpp
+++ b/cpp/include/raft/spectral/detail/lapack.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -473,33 +473,33 @@ void Lapack<T>::gemm(bool transa,
                      int ldc)
 {
   // check_lapack_enabled();
-  //#ifdef NVGRAPH_USE_LAPACK
+  // #ifdef NVGRAPH_USE_LAPACK
   const char transA_char = transa ? 'T' : 'N';
   const char transB_char = transb ? 'T' : 'N';
   lapack_gemm(transA_char, transB_char, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-  //#endif
+  // #endif
 }
 
 template <typename T>
 void Lapack<T>::sterf(int n, T* d, T* e)
 {
   //    check_lapack_enabled();
-  //#ifdef NVGRAPH_USE_LAPACK
+  // #ifdef NVGRAPH_USE_LAPACK
   int info;
   lapack_sterf(n, d, e, &info);
   lapackCheckError(info);
-  //#endif
+  // #endif
 }
 
 template <typename T>
 void Lapack<T>::steqr(char compz, int n, T* d, T* e, T* z, int ldz, T* work)
 {
   //    check_lapack_enabled();
-  //#ifdef NVGRAPH_USE_LAPACK
+  // #ifdef NVGRAPH_USE_LAPACK
   int info;
   lapack_steqr(compz, n, d, e, z, ldz, work, &info);
   lapackCheckError(info);
-  //#endif
+  // #endif
 }
 
 template <typename T>
diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh
index 87a1b8f686..2500a48bfb 100644
--- a/cpp/include/raft/stats/adjusted_rand_index.cuh
+++ b/cpp/include/raft/stats/adjusted_rand_index.cuh
@@ -82,7 +82,7 @@ double adjusted_rand_index(raft::device_resources const& handle,
 
 /** @} */  // end group stats_adj_rand_index
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh
index 17ff658ac8..10e1753423 100644
--- a/cpp/include/raft/stats/completeness_score.cuh
+++ b/cpp/include/raft/stats/completeness_score.cuh
@@ -84,7 +84,7 @@ double completeness_score(raft::device_resources const& handle,
 
 /** @} */  // end group stats_completeness
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh
index c0c387e067..f58061ba72 100644
--- a/cpp/include/raft/stats/cov.cuh
+++ b/cpp/include/raft/stats/cov.cuh
@@ -115,7 +115,7 @@ void cov(raft::device_resources const& handle,
 
 /** @} */  // end group stats_cov
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/detail/minmax.cuh b/cpp/include/raft/stats/detail/minmax.cuh
index 1ccd725189..4edafe82c8 100644
--- a/cpp/include/raft/stats/detail/minmax.cuh
+++ b/cpp/include/raft/stats/detail/minmax.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,8 +36,7 @@ constexpr To bit_cast(const From& from) noexcept
 }
 
 template <typename T>
-struct encode_traits {
-};
+struct encode_traits {};
 
 template <>
 struct encode_traits<float> {
diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh
index d59dc8e37a..05f08f52a4 100644
--- a/cpp/include/raft/stats/entropy.cuh
+++ b/cpp/include/raft/stats/entropy.cuh
@@ -79,7 +79,7 @@ double entropy(raft::device_resources const& handle,
 
 /** @} */  // end group stats_entropy
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh
index f829b0317e..d97d0759a0 100644
--- a/cpp/include/raft/stats/histogram.cuh
+++ b/cpp/include/raft/stats/histogram.cuh
@@ -114,7 +114,7 @@ void histogram(raft::device_resources const& handle,
 
 /** @} */  // end group stats_histogram
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh
index 173d63e47e..ca6c1ddf8e 100644
--- a/cpp/include/raft/stats/homogeneity_score.cuh
+++ b/cpp/include/raft/stats/homogeneity_score.cuh
@@ -87,7 +87,7 @@ double homogeneity_score(raft::device_resources const& handle,
 
 /** @} */  // end group stats_homogeneity_score
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh
index d27f736255..f19cc0d90d 100644
--- a/cpp/include/raft/stats/kl_divergence.cuh
+++ b/cpp/include/raft/stats/kl_divergence.cuh
@@ -73,7 +73,7 @@ value_t kl_divergence(raft::device_resources const& handle,
 
 /** @} */  // end group kl_divergence
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh
index a576e63bee..303700e80d 100644
--- a/cpp/include/raft/stats/mean.cuh
+++ b/cpp/include/raft/stats/mean.cuh
@@ -92,7 +92,7 @@ void mean(raft::device_resources const& handle,
 
 /** @} */  // end group stats_mean
 
-};  // namespace stats
-};  // namespace raft
+};         // namespace stats
+};         // namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh
index b333b3c8da..2f1deb7467 100644
--- a/cpp/include/raft/stats/mean_center.cuh
+++ b/cpp/include/raft/stats/mean_center.cuh
@@ -159,7 +159,7 @@ void mean_add(raft::device_resources const& handle,
 
 /** @} */  // end group stats_mean_center
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh
index 0ee21d1325..bad85f4260 100644
--- a/cpp/include/raft/stats/meanvar.cuh
+++ b/cpp/include/raft/stats/meanvar.cuh
@@ -106,6 +106,6 @@ void meanvar(raft::device_resources const& handle,
 
 /** @} */  // end group stats_mean_var
 
-};  // namespace raft::stats
+};         // namespace raft::stats
 
 #endif
diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh
index 8af4f7a92c..10f1ea163b 100644
--- a/cpp/include/raft/stats/minmax.cuh
+++ b/cpp/include/raft/stats/minmax.cuh
@@ -138,6 +138,6 @@ void minmax(raft::device_resources const& handle,
 
 /** @} */  // end group stats_minmax
 
-};  // namespace stats
-};  // namespace raft
+};         // namespace stats
+};         // namespace raft
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh
index ca7f33d398..be30bcd7fc 100644
--- a/cpp/include/raft/stats/mutual_info_score.cuh
+++ b/cpp/include/raft/stats/mutual_info_score.cuh
@@ -85,7 +85,7 @@ double mutual_info_score(raft::device_resources const& handle,
 
 /** @} */  // end group stats_mutual_info
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh
index 25b92e4e10..f0b37592e4 100644
--- a/cpp/include/raft/stats/rand_index.cuh
+++ b/cpp/include/raft/stats/rand_index.cuh
@@ -71,7 +71,7 @@ double rand_index(raft::device_resources const& handle,
 
 /** @} */  // end group stats_rand_index
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh
index 0b038c85ea..7b0cc6cbe0 100644
--- a/cpp/include/raft/stats/stddev.cuh
+++ b/cpp/include/raft/stats/stddev.cuh
@@ -181,7 +181,7 @@ void vars(raft::device_resources const& handle,
 
 /** @} */  // end group stats_variance
 
-};  // namespace stats
-};  // namespace raft
+};         // namespace stats
+};         // namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh
index 5f169b3384..d033dc8892 100644
--- a/cpp/include/raft/stats/sum.cuh
+++ b/cpp/include/raft/stats/sum.cuh
@@ -84,7 +84,7 @@ void sum(raft::device_resources const& handle,
 
 /** @} */  // end group stats_sum
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh
index be1d83d59d..948dd0a6ef 100644
--- a/cpp/include/raft/stats/v_measure.cuh
+++ b/cpp/include/raft/stats/v_measure.cuh
@@ -91,7 +91,7 @@ double v_measure(raft::device_resources const& handle,
 
 /** @} */  // end group stats_vmeasure
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh
index 7f061e0b45..273adf1641 100644
--- a/cpp/include/raft/stats/weighted_mean.cuh
+++ b/cpp/include/raft/stats/weighted_mean.cuh
@@ -185,7 +185,7 @@ void col_weighted_mean(raft::device_resources const& handle,
 
 /** @} */  // end group stats_weighted_mean
 
-};  // end namespace stats
-};  // end namespace raft
+};         // end namespace stats
+};         // end namespace raft
 
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/util/bitonic_sort.cuh b/cpp/include/raft/util/bitonic_sort.cuh
index e34708e332..46670d39bd 100644
--- a/cpp/include/raft/util/bitonic_sort.cuh
+++ b/cpp/include/raft/util/bitonic_sort.cuh
@@ -99,10 +99,10 @@ class bitonic {
   {
   }
 
-  bitonic(bitonic const&) = delete;
-  bitonic(bitonic&&)      = delete;
+  bitonic(bitonic const&)                    = delete;
+  bitonic(bitonic&&)                         = delete;
   auto operator=(bitonic const&) -> bitonic& = delete;
-  auto operator=(bitonic&&) -> bitonic& = delete;
+  auto operator=(bitonic&&) -> bitonic&      = delete;
 
   /**
    * You can think of this function in two ways:
diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh
index 77e3ed2d6d..11b1edee73 100644
--- a/cpp/include/raft/util/cache.cuh
+++ b/cpp/include/raft/util/cache.cuh
@@ -362,9 +362,9 @@ class Cache {
   int GetSize() const { return cached_keys.size(); }
 
  private:
-  int n_vec;         //!< Number of elements in a cached vector
-  float cache_size;  //!< in MiB
-  int n_cache_sets;  //!< number of cache sets
+  int n_vec;            //!< Number of elements in a cached vector
+  float cache_size;     //!< in MiB
+  int n_cache_sets;     //!< number of cache sets
 
   const int TPB = 256;  //!< threads per block for kernel launch
   int n_iter    = 0;    //!< Counter for time stamping cache operation
diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh
index 413e7522b1..b20982473f 100644
--- a/cpp/include/raft/util/cache_util.cuh
+++ b/cpp/include/raft/util/cache_util.cuh
@@ -46,7 +46,7 @@ __global__ void get_vecs(
   const math_t* cache, int_t n_vec, const idx_t* cache_idx, int_t n, math_t* out)
 {
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  int row = tid % n_vec;  // row idx
+  int row = tid % n_vec;             // row idx
   if (tid < n_vec * n) {
     size_t out_col   = tid / n_vec;  // col idx
     size_t cache_col = cache_idx[out_col];
@@ -93,7 +93,7 @@ __global__ void store_vecs(const math_t* tile,
                            int n_cache_vecs)
 {
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  int row = tid % n_vec;  // row idx
+  int row = tid % n_vec;          // row idx
   if (tid < n_vec * n) {
     int tile_col  = tid / n_vec;  // col idx
     int data_col  = tile_idx ? tile_idx[tile_col] : tile_col;
@@ -359,7 +359,7 @@ __global__ void get_cache_idx(int* keys,
       cache_time[cidx] = time;  // update time stamp
       cache_idx[tid]   = cidx;  // exact cache idx
     } else {
-      cache_idx[tid] = sidx;  // assign cache set
+      cache_idx[tid] = sidx;    // assign cache set
     }
   }
 }
diff --git a/cpp/include/raft/util/integer_utils.hpp b/cpp/include/raft/util/integer_utils.hpp
index e85086df42..6faab5381c 100644
--- a/cpp/include/raft/util/integer_utils.hpp
+++ b/cpp/include/raft/util/integer_utils.hpp
@@ -189,12 +189,10 @@ constexpr inline auto absolute_value(T val) -> std::enable_if_t<!std::is_signed<
  * @{
  */
 template <typename From, typename To, typename = void>
-struct is_narrowing : std::true_type {
-};
+struct is_narrowing : std::true_type {};
 
 template <typename From, typename To>
-struct is_narrowing<From, To, std::void_t<decltype(To{std::declval<From>()})>> : std::false_type {
-};
+struct is_narrowing<From, To, std::void_t<decltype(To{std::declval<From>()})>> : std::false_type {};
 /** @} */
 
 /** Check whether the numeric conversion is narrowing */
diff --git a/cpp/include/raft/util/vectorized.cuh b/cpp/include/raft/util/vectorized.cuh
index 5356f6a153..8af2e498e9 100644
--- a/cpp/include/raft/util/vectorized.cuh
+++ b/cpp/include/raft/util/vectorized.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@
 namespace raft {
 
 template <typename math_, int VecLen>
-struct IOType {
-};
+struct IOType {};
 template <>
 struct IOType<bool, 1> {
   static_assert(sizeof(bool) == sizeof(int8_t), "IOType bool size assumption failed");
diff --git a/cpp/include/raft_runtime/neighbors/refine.hpp b/cpp/include/raft_runtime/neighbors/refine.hpp
index 0171259bbb..2c162c2faa 100644
--- a/cpp/include/raft_runtime/neighbors/refine.hpp
+++ b/cpp/include/raft_runtime/neighbors/refine.hpp
@@ -18,7 +18,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
-//#include <raft/core/host_mdspan.hpp>
+// #include <raft/core/host_mdspan.hpp>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py
deleted file mode 100755
index 5e29a3c5f1..0000000000
--- a/cpp/scripts/run-clang-format.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-import sys
-import re
-import os
-import subprocess
-import argparse
-import tempfile
-
-
-EXPECTED_VERSION = "11.1.0"
-VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)")
-# NOTE: populate this list with more top-level dirs as we add more of them to
-#       to the raft repo
-DEFAULT_DIRS = ["cpp/bench",
-                "cpp/include",
-                "cpp/src",
-                "cpp/test"]
-
-
-def parse_args():
-    argparser = argparse.ArgumentParser("Runs clang-format on a project")
-    argparser.add_argument("-dstdir", type=str, default=None,
-                           help="Directory to store the temporary outputs of"
-                           " clang-format. If nothing is passed for this, then"
-                           " a temporary dir will be created using `mkdtemp`")
-    argparser.add_argument("-exe", type=str, default="clang-format",
-                           help="Path to clang-format exe")
-    argparser.add_argument("-inplace", default=False, action="store_true",
-                           help="Replace the source files itself.")
-    argparser.add_argument("-regex", type=str,
-                           default=r"[.](cu|cuh|h|hpp|cpp)$",
-                           help="Regex string to filter in sources")
-    argparser.add_argument("-ignore", type=str, default=r".*thirdparty.*$",
-                           help="Regex used to ignore files from matched list")
-    argparser.add_argument("-v", dest="verbose", action="store_true",
-                           help="Print verbose messages")
-    argparser.add_argument("dirs", type=str, nargs="*",
-                           help="List of dirs where to find sources")
-    args = argparser.parse_args()
-    args.regex_compiled = re.compile(args.regex)
-    args.ignore_compiled = re.compile(args.ignore)
-    if args.dstdir is None:
-        args.dstdir = tempfile.mkdtemp()
-    ret = subprocess.check_output("%s --version" % args.exe, shell=True)
-    ret = ret.decode("utf-8")
-    version = VERSION_REGEX.search(ret)
-    if version is None:
-        raise Exception("Failed to figure out clang-format version!")
-    version = version.group(1)
-    if version != EXPECTED_VERSION:
-        raise Exception("clang-format exe must be v%s found '%s'" % \
-                        (EXPECTED_VERSION, version))
-    if len(args.dirs) == 0:
-        args.dirs = DEFAULT_DIRS
-    return args
-
-
-def list_all_src_files(file_regex, ignore_regex, srcdirs, dstdir, inplace):
-    allFiles = []
-    for srcdir in srcdirs:
-        for root, dirs, files in os.walk(srcdir):
-            for f in files:
-                if re.search(file_regex, f):
-                    src = os.path.join(root, f)
-                    if re.search(ignore_regex, src):
-                        continue
-                    if inplace:
-                        _dir = root
-                    else:
-                        _dir = os.path.join(dstdir, root)
-                    dst = os.path.join(_dir, f)
-                    allFiles.append((src, dst))
-    return allFiles
-
-
-def run_clang_format(src, dst, exe, verbose):
-    dstdir = os.path.dirname(dst)
-    if not os.path.exists(dstdir):
-        os.makedirs(dstdir)
-    # run the clang format command itself
-    if src == dst:
-        cmd = "%s -i %s" % (exe, src)
-    else:
-        cmd = "%s %s > %s" % (exe, src, dst)
-    try:
-        subprocess.check_call(cmd, shell=True)
-    except subprocess.CalledProcessError:
-        print("Failed to run clang-format! Maybe your env is not proper?")
-        raise
-    # run the diff to check if there are any formatting issues
-    cmd = "diff -q %s %s >/dev/null" % (src, dst)
-    try:
-        subprocess.check_call(cmd, shell=True)
-        if verbose:
-            print("%s passed" % os.path.basename(src))
-    except subprocess.CalledProcessError:
-        print("%s failed! 'diff %s %s' will show formatting violations!" % \
-              (os.path.basename(src), src, dst))
-        return False
-    return True
-
-
-def main():
-    args = parse_args()
-    # Attempt to making sure that we run this script from root of repo always
-    if not os.path.exists(".git"):
-        print("Error!! This needs to always be run from the root of repo")
-        sys.exit(-1)
-    all_files = list_all_src_files(args.regex_compiled, args.ignore_compiled,
-                                   args.dirs, args.dstdir, args.inplace)
-    # actual format checker
-    status = True
-    for src, dst in all_files:
-        if not run_clang_format(src, dst, args.exe, args.verbose):
-            status = False
-    if not status:
-        print("clang-format failed! You have 2 options:")
-        print(" 1. Look at formatting differences above and fix them manually")
-        print(" 2. Or run the below command to bulk-fix all these at once")
-        print("Bulk-fix command: ")
-        print("  python cpp/scripts/run-clang-format.py %s -inplace" % \
-              " ".join(sys.argv[1:]))
-        sys.exit(-1)
-    return
-
-
-if __name__ == "__main__":
-    main()
diff --git a/cpp/test/core/mdarray.cu b/cpp/test/core/mdarray.cu
index 85d7bdb6c8..aab7979c0e 100644
--- a/cpp/test/core/mdarray.cu
+++ b/cpp/test/core/mdarray.cu
@@ -821,7 +821,7 @@ void test_mdspan_aligned_matrix()
   // manually aligning the above, using -1 as filler
   static constexpr int X = -1;
   long data_padded[]     = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  X, X, X, X, X, X,
-                        10, 11, 12, 13, 14, 15, 16, 17, 18, 19, X, X, X, X, X, X};
+                            10, 11, 12, 13, 14, 15, 16, 17, 18, 19, X, X, X, X, X, X};
 
   auto my_aligned_host_span =
     make_host_aligned_matrix_view<long, int, layout_right_padded<long>>(data_padded, rows, cols);
diff --git a/cpp/test/core/mdspan_utils.cu b/cpp/test/core/mdspan_utils.cu
index 526442da95..5e479b839f 100644
--- a/cpp/test/core/mdspan_utils.cu
+++ b/cpp/test/core/mdspan_utils.cu
@@ -30,8 +30,7 @@ template <typename ElementType,
           typename LayoutPolicy   = layout_c_contiguous,
           typename AccessorPolicy = stdex::default_accessor<ElementType>>
 struct derived_device_mdspan
-  : public device_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy> {
-};
+  : public device_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy> {};
 
 void test_template_asserts()
 {
diff --git a/cpp/test/distance/dist_canberra.cu b/cpp/test/distance/dist_canberra.cu
index db5555d9c8..9b8b6c016b 100644
--- a/cpp/test/distance/dist_canberra.cu
+++ b/cpp/test/distance/dist_canberra.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,8 +21,7 @@ namespace raft {
 namespace distance {
 
 template <typename DataType>
-class DistanceCanberra : public DistanceTest<raft::distance::DistanceType::Canberra, DataType> {
-};
+class DistanceCanberra : public DistanceTest<raft::distance::DistanceType::Canberra, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -64,8 +63,7 @@ TEST_P(DistanceCanberraD, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCanberraD, ::testing::ValuesIn(inputsd));
 
-class BigMatrixCanberra : public BigMatrixDistanceTest<raft::distance::DistanceType::Canberra> {
-};
+class BigMatrixCanberra : public BigMatrixDistanceTest<raft::distance::DistanceType::Canberra> {};
 TEST_F(BigMatrixCanberra, Result) {}
 
 }  // end namespace distance
diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/test/distance/dist_correlation.cu
index 0e3f0ee0b5..fc729dec1c 100644
--- a/cpp/test/distance/dist_correlation.cu
+++ b/cpp/test/distance/dist_correlation.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceCorrelation
-  : public DistanceTest<raft::distance::DistanceType::CorrelationExpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::CorrelationExpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceCorrelationD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixCorrelation
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::CorrelationExpanded> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::CorrelationExpanded> {};
 TEST_F(BigMatrixCorrelation, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_cos.cu b/cpp/test/distance/dist_cos.cu
index 9faf7651f7..9e1cf5af17 100644
--- a/cpp/test/distance/dist_cos.cu
+++ b/cpp/test/distance/dist_cos.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,8 +64,7 @@ TEST_P(DistanceExpCosD, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosD, ::testing::ValuesIn(inputsd));
 
-class BigMatrixCos : public BigMatrixDistanceTest<raft::distance::DistanceType::CosineExpanded> {
-};
+class BigMatrixCos : public BigMatrixDistanceTest<raft::distance::DistanceType::CosineExpanded> {};
 TEST_F(BigMatrixCos, Result) {}
 
 }  // end namespace distance
diff --git a/cpp/test/distance/dist_hamming.cu b/cpp/test/distance/dist_hamming.cu
index 1eef9fba4e..9529ec2eaa 100644
--- a/cpp/test/distance/dist_hamming.cu
+++ b/cpp/test/distance/dist_hamming.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceHamming
-  : public DistanceTest<raft::distance::DistanceType::HammingUnexpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::HammingUnexpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceHammingD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHammingD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixHamming
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::HammingUnexpanded> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::HammingUnexpanded> {};
 TEST_F(BigMatrixHamming, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_hellinger.cu b/cpp/test/distance/dist_hellinger.cu
index 85a157aa31..93d6101a18 100644
--- a/cpp/test/distance/dist_hellinger.cu
+++ b/cpp/test/distance/dist_hellinger.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceHellingerExp
-  : public DistanceTest<raft::distance::DistanceType::HellingerExpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::HellingerExpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceHellingerExpD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHellingerExpD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixHellingerExp
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::HellingerExpanded> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::HellingerExpanded> {};
 TEST_F(BigMatrixHellingerExp, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_inner_product.cu b/cpp/test/distance/dist_inner_product.cu
index 68ce4c841a..8dd7ef0874 100644
--- a/cpp/test/distance/dist_inner_product.cu
+++ b/cpp/test/distance/dist_inner_product.cu
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceInnerProduct
-  : public DistanceTest<raft::distance::DistanceType::InnerProduct, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::InnerProduct, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 10, 5, 32, true, 1234ULL},
@@ -68,8 +67,7 @@ TEST_P(DistanceInnerProductD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceInnerProductD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixInnerProduct
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::InnerProduct> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::InnerProduct> {};
 TEST_F(BigMatrixInnerProduct, Result) {}
 
 }  // end namespace distance
diff --git a/cpp/test/distance/dist_jensen_shannon.cu b/cpp/test/distance/dist_jensen_shannon.cu
index a1e2f9f38c..e0e256c925 100644
--- a/cpp/test/distance/dist_jensen_shannon.cu
+++ b/cpp/test/distance/dist_jensen_shannon.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceJensenShannon
-  : public DistanceTest<raft::distance::DistanceType::JensenShannon, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::JensenShannon, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceJensenShannonD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceJensenShannonD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixJensenShannon
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::JensenShannon> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::JensenShannon> {};
 TEST_F(BigMatrixJensenShannon, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_kl_divergence.cu b/cpp/test/distance/dist_kl_divergence.cu
index 94330d9450..1f79ebcad4 100644
--- a/cpp/test/distance/dist_kl_divergence.cu
+++ b/cpp/test/distance/dist_kl_divergence.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceKLDivergence
-  : public DistanceTest<raft::distance::DistanceType::KLDivergence, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::KLDivergence, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceKLDivergenceD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceKLDivergenceD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixKLDivergence
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::KLDivergence> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::KLDivergence> {};
 TEST_F(BigMatrixKLDivergence, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_l1.cu b/cpp/test/distance/dist_l1.cu
index dc6bcf72b7..ce62a4aeec 100644
--- a/cpp/test/distance/dist_l1.cu
+++ b/cpp/test/distance/dist_l1.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,8 +21,7 @@ namespace raft {
 namespace distance {
 
 template <typename DataType>
-class DistanceUnexpL1 : public DistanceTest<raft::distance::DistanceType::L1, DataType> {
-};
+class DistanceUnexpL1 : public DistanceTest<raft::distance::DistanceType::L1, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -64,8 +63,7 @@ TEST_P(DistanceUnexpL1D, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceUnexpL1D, ::testing::ValuesIn(inputsd));
 
-class BigMatrixUnexpL1 : public BigMatrixDistanceTest<raft::distance::DistanceType::L1> {
-};
+class BigMatrixUnexpL1 : public BigMatrixDistanceTest<raft::distance::DistanceType::L1> {};
 TEST_F(BigMatrixUnexpL1, Result) {}
 
 }  // end namespace distance
diff --git a/cpp/test/distance/dist_l2_exp.cu b/cpp/test/distance/dist_l2_exp.cu
index ae67215e51..6b6a290386 100644
--- a/cpp/test/distance/dist_l2_exp.cu
+++ b/cpp/test/distance/dist_l2_exp.cu
@@ -67,8 +67,7 @@ TEST_P(DistanceEucExpTestD, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucExpTestD, ::testing::ValuesIn(inputsd));
 
-class BigMatrixEucExp : public BigMatrixDistanceTest<raft::distance::DistanceType::L2Expanded> {
-};
+class BigMatrixEucExp : public BigMatrixDistanceTest<raft::distance::DistanceType::L2Expanded> {};
 TEST_F(BigMatrixEucExp, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_l2_sqrt_exp.cu b/cpp/test/distance/dist_l2_sqrt_exp.cu
index 94d254f44b..5bccabcc3f 100644
--- a/cpp/test/distance/dist_l2_sqrt_exp.cu
+++ b/cpp/test/distance/dist_l2_sqrt_exp.cu
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceEucSqrtExpTest
-  : public DistanceTest<raft::distance::DistanceType::L2SqrtExpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::L2SqrtExpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 2048, 4096, 128, true, 1234ULL},
@@ -69,8 +68,7 @@ TEST_P(DistanceEucSqrtExpTestD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucSqrtExpTestD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixEucSqrtExp
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::L2SqrtExpanded> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::L2SqrtExpanded> {};
 TEST_F(BigMatrixEucSqrtExp, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/dist_l2_unexp.cu b/cpp/test/distance/dist_l2_unexp.cu
index d74a41d2a4..19b0ff6dbf 100644
--- a/cpp/test/distance/dist_l2_unexp.cu
+++ b/cpp/test/distance/dist_l2_unexp.cu
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceEucUnexpTest
-  : public DistanceTest<raft::distance::DistanceType::L2Unexpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::L2Unexpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
diff --git a/cpp/test/distance/dist_l_inf.cu b/cpp/test/distance/dist_l_inf.cu
index b9d6413a10..223d186a8d 100644
--- a/cpp/test/distance/dist_l_inf.cu
+++ b/cpp/test/distance/dist_l_inf.cu
@@ -21,8 +21,7 @@ namespace raft {
 namespace distance {
 
 template <typename DataType>
-class DistanceLinf : public DistanceTest<raft::distance::DistanceType::Linf, DataType> {
-};
+class DistanceLinf : public DistanceTest<raft::distance::DistanceType::Linf, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -64,8 +63,7 @@ TEST_P(DistanceLinfD, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLinfD, ::testing::ValuesIn(inputsd));
 
-class BigMatrixLinf : public BigMatrixDistanceTest<raft::distance::DistanceType::Linf> {
-};
+class BigMatrixLinf : public BigMatrixDistanceTest<raft::distance::DistanceType::Linf> {};
 TEST_F(BigMatrixLinf, Result) {}
 
 }  // end namespace distance
diff --git a/cpp/test/distance/dist_russell_rao.cu b/cpp/test/distance/dist_russell_rao.cu
index 3c5124c31f..73cf4b33a4 100644
--- a/cpp/test/distance/dist_russell_rao.cu
+++ b/cpp/test/distance/dist_russell_rao.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ namespace distance {
 
 template <typename DataType>
 class DistanceRussellRao
-  : public DistanceTest<raft::distance::DistanceType::RusselRaoExpanded, DataType> {
-};
+  : public DistanceTest<raft::distance::DistanceType::RusselRaoExpanded, DataType> {};
 
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -66,8 +65,7 @@ TEST_P(DistanceRussellRaoD, Result)
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceRussellRaoD, ::testing::ValuesIn(inputsd));
 
 class BigMatrixRussellRao
-  : public BigMatrixDistanceTest<raft::distance::DistanceType::RusselRaoExpanded> {
-};
+  : public BigMatrixDistanceTest<raft::distance::DistanceType::RusselRaoExpanded> {};
 TEST_F(BigMatrixRussellRao, Result) {}
 }  // end namespace distance
 }  // end namespace raft
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index 438e212fbd..b8c35461b1 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -16,14 +16,14 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/common/nvtx.hpp>  // common::nvtx::range
+#include <raft/common/nvtx.hpp>              // common::nvtx::range
 
 #include <raft/core/device_mdspan.hpp>       // make_device_matrix_view
 #include <raft/core/device_resources.hpp>    // raft::device_resources
 #include <raft/core/operators.hpp>           // raft::sqrt
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/random/rng.cuh>
-#include <rmm/device_uvector.hpp>  // rmm::device_uvector
+#include <rmm/device_uvector.hpp>            // rmm::device_uvector
 
 // When the distance library is precompiled, include only the raft_runtime
 // headers. This way, a small change in one of the kernel internals does not
diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu
index ba2572b5a9..48a077aa26 100644
--- a/cpp/test/linalg/rsvd.cu
+++ b/cpp/test/linalg/rsvd.cu
@@ -159,24 +159,24 @@ class RsvdTest : public ::testing::TestWithParam<RsvdInputs<T>> {
 
 const std::vector<RsvdInputs<float>> inputs_fx = {
   // Test with ratios
-  {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},   // Square + BBT
-  {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},  // Tall + BBT
+  {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},      // Square + BBT
+  {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},     // Tall + BBT
 
-  {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},   // Square + non-BBT
-  {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},  // Tall + non-BBT
+  {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},     // Square + non-BBT
+  {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},    // Tall + non-BBT
 
-  {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},   // Square + BBT
-  {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},  // Tall + BBT
+  {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},    // Square + BBT
+  {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL},   // Tall + BBT
 
-  {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},  // Square + non-BBT
-  {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}  // Tall + non-BBT
+  {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL},   // Square + non-BBT
+  {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}   // Tall + non-BBT
 
-  ,                                                              // Test with fixed ranks
-  {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},   // Square + BBT
-  {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},  // Tall + BBT
+  ,                                                                // Test with fixed ranks
+  {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},     // Square + BBT
+  {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},    // Tall + BBT
 
-  {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL},   // Square + non-BBT
-  {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL},  // Tall + non-BBT
+  {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL},    // Square + non-BBT
+  {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL},   // Tall + non-BBT
 
   {0.60f, 2048, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},   // Square + BBT
   {1.00f, 16384, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL},  // Tall + BBT
@@ -187,14 +187,14 @@ const std::vector<RsvdInputs<float>> inputs_fx = {
 
 const std::vector<RsvdInputs<double>> inputs_dx = {
   // Test with ratios
-  {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},     // Square + BBT
-  {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},    // Tall + BBT
-  {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},    // Square + non-BBT
-  {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},   // Tall + non-BBT
-  {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},   // Square + BBT
-  {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},  // Tall + BBT
-  {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},  // Square + non-BBT
-  {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}  // Tall + non-BBT
+  {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},      // Square + BBT
+  {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},     // Tall + BBT
+  {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},     // Square + non-BBT
+  {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},    // Tall + non-BBT
+  {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},    // Square + BBT
+  {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL},   // Tall + BBT
+  {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL},   // Square + non-BBT
+  {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}   // Tall + non-BBT
 
   ,                                                             // Test with fixed ranks
   {0.10, 256, 256, 0.25f, 0.0, 0.0, 100, 5, true, 4321ULL},     // Square + BBT
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index 385e9a80c0..8b8aa21fc9 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -221,7 +221,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {100},
     {1000},
     {8},
-    {1, 16, 33},  // k
+    {1, 16, 33},   // k
     {search_algo::SINGLE_CTA, search_algo::MULTI_KERNEL},
     {1, 10, 100},  // query size
     {0},
diff --git a/cpp/test/neighbors/knn.cu b/cpp/test/neighbors/knn.cu
index bcd4b9cb0b..edac73b073 100644
--- a/cpp/test/neighbors/knn.cu
+++ b/cpp/test/neighbors/knn.cu
@@ -80,11 +80,11 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
  protected:
   void testBruteForce()
   {
-    //#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG)
+    // #if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG)
     raft::print_device_vector("Input array: ", input_.data(), rows_ * cols_, std::cout);
     std::cout << "K: " << k_ << std::endl;
     raft::print_device_vector("Labels array: ", search_labels_.data(), rows_, std::cout);
-    //#endif
+    // #endif
 
     std::vector<device_matrix_view<const T, IdxT, row_major>> index = {
       make_device_matrix_view((const T*)(input_.data()), rows_, cols_)};
diff --git a/cpp/test/sparse/spgemmi.cu b/cpp/test/sparse/spgemmi.cu
index ec77b8e88b..e0aa4bc43b 100644
--- a/cpp/test/sparse/spgemmi.cu
+++ b/cpp/test/sparse/spgemmi.cu
@@ -63,20 +63,20 @@ class SPGemmiTest : public ::testing::TestWithParam<SPGemmiInputs> {
     int hB_rows[]       = {0, 2, 3, 1, 0, 2, 3, 1, 3};
     float hB_values[]   = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
     float hA[]          = {1.0f,
-                  2.0f,
-                  3.0f,
-                  4.0f,
-                  5.0f,
-                  6.0f,
-                  7.0f,
-                  8.0f,
-                  9.0f,
-                  10.0f,
-                  11.0f,
-                  12.0f,
-                  13.0f,
-                  14.0f,
-                  15.0f};
+                           2.0f,
+                           3.0f,
+                           4.0f,
+                           5.0f,
+                           6.0f,
+                           7.0f,
+                           8.0f,
+                           9.0f,
+                           10.0f,
+                           11.0f,
+                           12.0f,
+                           13.0f,
+                           14.0f,
+                           15.0f};
     std::vector<float> hC(C_size);
     std::vector<float> hC_expected{23, 26, 29, 32,  35,  24, 28, 32, 36, 40,
                                    71, 82, 93, 104, 115, 48, 56, 64, 72, 80};
diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/test/util/bitonic_sort.cu
index 2cf5420334..d1f03f78b5 100644
--- a/cpp/test/util/bitonic_sort.cu
+++ b/cpp/test/util/bitonic_sort.cu
@@ -103,12 +103,12 @@ struct bitonic_launch {
 };
 
 template <typename T>
-class BitonicTest : public testing::TestWithParam<test_spec> {  // NOLINT
+class BitonicTest : public testing::TestWithParam<test_spec> {     // NOLINT
  protected:
-  const test_spec spec;  // NOLINT
-  std::vector<T> in;     // NOLINT
-  std::vector<T> out;    // NOLINT
-  std::vector<T> ref;    // NOLINT
+  const test_spec spec;                                            // NOLINT
+  std::vector<T> in;                                               // NOLINT
+  std::vector<T> out;                                              // NOLINT
+  std::vector<T> ref;                                              // NOLINT
 
   void segmented_sort(std::vector<T>& vec, int k, bool ascending)  // NOLINT
   {
@@ -184,13 +184,13 @@ auto inputs = ::testing::Values(test_spec{1, 1, 1, true},
                                 test_spec{70, 1, 64, true},
                                 test_spec{70, 2, 128, false});
 
-using Floats = BitonicTest<float>;                     // NOLINT
-TEST_P(Floats, Run) { run(); }                         // NOLINT
-INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs);  // NOLINT
+using Floats = BitonicTest<float>;                      // NOLINT
+TEST_P(Floats, Run) { run(); }                          // NOLINT
+INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs);   // NOLINT
 
-using Ints = BitonicTest<int>;                       // NOLINT
-TEST_P(Ints, Run) { run(); }                         // NOLINT
-INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs);  // NOLINT
+using Ints = BitonicTest<int>;                          // NOLINT
+TEST_P(Ints, Run) { run(); }                            // NOLINT
+INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs);     // NOLINT
 
 using Doubles = BitonicTest<double>;                    // NOLINT
 TEST_P(Doubles, Run) { run(); }                         // NOLINT
diff --git a/dependencies.yaml b/dependencies.yaml
index f3e0cd1167..1bc50d5dd4 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -143,10 +143,10 @@ dependencies:
     common:
       - output_types: [conda, requirements]
         packages:
-          - clang=11.1.0
+          - clang=16.0.1
       - output_types: [conda]
         packages:
-          - clang-tools=11.1.0
+          - clang-tools=16.0.1
   nn_bench:
     common:
       - output_types: [conda]
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index 6f57453e28..99712fc996 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -14,7 +14,7 @@ Developing features and fixing bugs for the RAFT library itself is straightforwa
 
 The process for working on a CUDA/C++ feature which might span RAFT and one or more consuming libraries can vary slightly depending on whether the consuming project relies on a source build (as outlined in the [BUILD](BUILD.md#install_header_only_cpp) docs). In such a case, the option `CPM_raft_SOURCE=/path/to/raft/source` can be passed to the cmake of the consuming project in order to build the local RAFT from source. The PR with relevant changes to the consuming project can also pin the RAFT version temporarily by explicitly changing the `FORK` and `PINNED_TAG` arguments to the RAFT branch containing their changes when invoking `find_and_configure_raft`.  The pin should be reverted after the changed is merged to the RAFT project and before it is merged to the dependent project(s) downstream.
 
-If building a feature which spans projects and not using the source build in cmake, the RAFT changes (both C++ and Python) will need to be installed into the environment of the consuming project before they can be used. The ideal integration of RAFT into consuming projects will enable both the source build in the consuming project only for this case but also rely on a more stable packaging (such as conda packaging) otherwise. 
+If building a feature which spans projects and not using the source build in cmake, the RAFT changes (both C++ and Python) will need to be installed into the environment of the consuming project before they can be used. The ideal integration of RAFT into consuming projects will enable both the source build in the consuming project only for this case but also rely on a more stable packaging (such as conda packaging) otherwise.
 
 
 ## Threading Model
@@ -95,12 +95,12 @@ template <typename value_t, typename idx_t>
 class ivf_pq {
   ivf_pq_params params_;
   raft::resources const& res_;
-  
+
 public:
   ivf_pq(raft::resources const& res);
   void train(raft::device_matrix<value_t, idx_t, raft::row_major> dataset);
-  void search(raft::device_matrix<value_t, idx_t, raft::row_major> queries, 
-              raft::device_matrix<value_t, idx_t, raft::row_major> out_inds, 
+  void search(raft::device_matrix<value_t, idx_t, raft::row_major> queries,
+              raft::device_matrix<value_t, idx_t, raft::row_major> out_inds,
               raft::device_matrix<value_t, idx_t, raft::row_major> out_dists);
 };
 ```
@@ -134,46 +134,77 @@ namespace raft::ivf_pq {
 
 ## Coding style
 
-### Code format
-#### Introduction
-RAFT relies on `clang-format` to enforce code style across all C++ and CUDA source code. The coding style is based on the [Google style guide](https://google.github.io/styleguide/cppguide.html#Formatting). The only digressions from this style are the following.
-1. Do not split empty functions/records/namespaces.
-2. Two-space indentation everywhere, including the line continuations.
-3. Disable reflowing of comments.
-   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/.clang-format).
+### Code Formatting
+
+#### Using pre-commit hooks
+
+RAFT uses [pre-commit](https://pre-commit.com/) to execute all code linters and formatters. These
+tools ensure a consistent code format throughout the project. Using pre-commit ensures that linter
+versions and options are aligned for all developers. Additionally, there is a CI check in place to
+enforce that committed code follows our standards.
+
+To use `pre-commit`, install via `conda` or `pip`:
+
+```bash
+conda install -c conda-forge pre-commit
+```
+
+```bash
+pip install pre-commit
+```
+
+Then run pre-commit hooks before committing code:
+
+```bash
+pre-commit run
+```
+
+By default, pre-commit runs on staged files (only changes and additions that will be committed).
+To run pre-commit checks on all files, execute:
 
-#### How is the check done?
-All formatting checks are done by this python script: [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR.
+```bash
+pre-commit run --all-files
+```
 
-##### As part of CI
-[run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section.
+Optionally, you may set up the pre-commit hooks to run automatically when you make a git commit. This can be done by running:
 
-##### Manually
-Developers can also manually (or setup this command as part of git pre-commit hook) run this check by executing:
 ```bash
-python ./cpp/scripts/run-clang-format.py
+pre-commit install
 ```
-From the root of the RAFT repository.
 
-#### How to know the formatting violations?
-When there are formatting errors, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file.
+Now code linters and formatters will be run each time you commit changes.
 
-#### How to fix the formatting violations?
-When there are formatting violations, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs.
+You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`.
+
+#### Summary of pre-commit hooks
+
+The following section describes some of the core pre-commit hooks used by the repository.
+See `.pre-commit-config.yaml` for a full list.
+
+C++/CUDA is formatted with [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html).
+
+RAFT relies on `clang-format` to enforce code style across all C++ and CUDA source code. The coding style is based on the [Google style guide](https://google.github.io/styleguide/cppguide.html#Formatting). The only digressions from this style are the following.
+1. Do not split empty functions/records/namespaces.
+2. Two-space indentation everywhere, including the line continuations.
+3. Disable reflowing of comments.
+   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/.clang-format).
+
+[`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter.
+In order to run doxygen as a linter on C++/CUDA code, run
 
-In short, to bulk-fix all the formatting violations, execute the following command:
 ```bash
-python ./cpp/scripts/run-clang-format.py -inplace
+./ci/checks/doxygen.sh
 ```
-From the root of the RAFT repository.
 
-#### clang-format version?
-To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) script itself. Refer [here](../build#build-dependencies) for the list of build-time dependencies.
+Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/),
+[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/).
 
-#### Additional scripts
-Along with clang, there are an include checker and copyright checker scripts for checking style, which can be performed as part of CI, as well as manually.
+RAFT also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
+mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes,
+you can run  `codespell -i 3 -w .` from the repository root directory.
+This will bring up an interactive prompt to select which spelling fixes to apply.
 
-##### #include style
+### #include style
 [include_checker.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies.
 2. `#include <...>` should be used for referencing everything else
@@ -183,7 +214,7 @@ Manually, run the following to bulk-fix include style issues:
 python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list of folders which you want to fix]
 ```
 
-##### Copyright header
+### Copyright header
 [copyright.py](https://github.com/rapidsai/raft/blob/branch-23.06/ci/checks/copyright.py) checks the Copyright header for all git-modified files
 
 Manually, you can run the following to bulk-fix the header if only the years need to be updated:
@@ -252,9 +283,9 @@ Sometimes, we need to temporarily change the log pattern (eg: for reporting deci
 
 ## Common Design Considerations
 
-1. Use the `hpp` extension for files which can be compiled with `gcc` against the CUDA-runtime. Use the `cuh` extension for files which require `nvcc` to be compiled. `hpp` can also be used for functions marked `__host__ __device__` only if proper checks are in place to remove the `__device__` designation when not compiling with `nvcc`. 
+1. Use the `hpp` extension for files which can be compiled with `gcc` against the CUDA-runtime. Use the `cuh` extension for files which require `nvcc` to be compiled. `hpp` can also be used for functions marked `__host__ __device__` only if proper checks are in place to remove the `__device__` designation when not compiling with `nvcc`.
 
-2. When additional classes, structs, or general POCO types are needed to be used for representing data in the public API, place them in a new file called `<primitive_name>_types.hpp`. This tells users they are safe to expose these types on their own public APIs without bringing in device code. At a minimum, the definitions for these types, at least, should not require `nvcc`. In general, these classes should only store very simple state and should not perform their own computations. Instead, new functions should be exposed on the public API which accept these objects, reading or updating their state as necessary. 
+2. When additional classes, structs, or general POCO types are needed to be used for representing data in the public API, place them in a new file called `<primitive_name>_types.hpp`. This tells users they are safe to expose these types on their own public APIs without bringing in device code. At a minimum, the definitions for these types, at least, should not require `nvcc`. In general, these classes should only store very simple state and should not perform their own computations. Instead, new functions should be exposed on the public API which accept these objects, reading or updating their state as necessary.
 
 3. Documentation for public APIs should be well documented, easy to use, and it is highly preferred that they include usage instructions.
 
@@ -262,7 +293,7 @@ Sometimes, we need to temporarily change the log pattern (eg: for reporting deci
 
 ## Testing
 
-It's important for RAFT to maintain a high test coverage of the public APIs in order to minimize the potential for downstream projects to encounter unexpected build or runtime behavior as a result of changes. 
+It's important for RAFT to maintain a high test coverage of the public APIs in order to minimize the potential for downstream projects to encounter unexpected build or runtime behavior as a result of changes.
 
 A well-defined public API can help maintain compile-time stability but means more focus should be placed on testing the functional requirements and verifying execution on the various edge cases within RAFT itself. Ideally, bug fixes and new features should be able to be made to RAFT independently of the consuming projects.
 
@@ -292,9 +323,9 @@ void foo(const double* srcdata, double* result)
     CUDA_RT_CALL( cudaStreamCreate( &stream ) );
     raft::resources res;
     set_cuda_stream(res, stream);
-    
+
     ...
-    
+
     RAFT_CUDA_TRY( cudaMemcpyAsync( srcdata, h_srcdata.data(), n*sizeof(double), cudaMemcpyHostToDevice, stream ) );
 
     raft::algo(raft::resources, dopredict, srcdata, result, ... );
@@ -348,7 +379,7 @@ void foo(const raft::resources& h, ...)
 }
 ```
 
-The example below shows one way to create `n_stream` number of internal cuda streams with an `rmm::stream_pool` which can later be used by the algos inside RAFT. 
+The example below shows one way to create `n_stream` number of internal cuda streams with an `rmm::stream_pool` which can later be used by the algos inside RAFT.
 ```cpp
 #include <raft/core/resources.hpp>
 #include <raft/core/resource/cuda_stream_pool.hpp>
diff --git a/thirdparty/pcg/pcg_basic.c b/thirdparty/pcg/pcg_basic.c
index 8c2fd0d22b..166663ea13 100644
--- a/thirdparty/pcg/pcg_basic.c
+++ b/thirdparty/pcg/pcg_basic.c
@@ -41,17 +41,14 @@ static pcg32_random_t pcg32_global = PCG32_INITIALIZER;
 
 void pcg32_srandom_r(pcg32_random_t* rng, uint64_t initstate, uint64_t initseq)
 {
-    rng->state = 0U;
-    rng->inc = (initseq << 1u) | 1u;
-    pcg32_random_r(rng);
-    rng->state += initstate;
-    pcg32_random_r(rng);
+  rng->state = 0U;
+  rng->inc   = (initseq << 1u) | 1u;
+  pcg32_random_r(rng);
+  rng->state += initstate;
+  pcg32_random_r(rng);
 }
 
-void pcg32_srandom(uint64_t seed, uint64_t seq)
-{
-    pcg32_srandom_r(&pcg32_global, seed, seq);
-}
+void pcg32_srandom(uint64_t seed, uint64_t seq) { pcg32_srandom_r(&pcg32_global, seed, seq); }
 
 // pcg32_random()
 // pcg32_random_r(rng)
@@ -59,18 +56,14 @@ void pcg32_srandom(uint64_t seed, uint64_t seq)
 
 uint32_t pcg32_random_r(pcg32_random_t* rng)
 {
-    uint64_t oldstate = rng->state;
-    rng->state = oldstate * 6364136223846793005ULL + rng->inc;
-    uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
-    uint32_t rot = oldstate >> 59u;
-    return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
-}
-
-uint32_t pcg32_random()
-{
-    return pcg32_random_r(&pcg32_global);
+  uint64_t oldstate   = rng->state;
+  rng->state          = oldstate * 6364136223846793005ULL + rng->inc;
+  uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
+  uint32_t rot        = oldstate >> 59u;
+  return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
 }
 
+uint32_t pcg32_random() { return pcg32_random_r(&pcg32_global); }
 
 // pcg32_boundedrand(bound):
 // pcg32_boundedrand_r(rng, bound):
@@ -78,39 +71,33 @@ uint32_t pcg32_random()
 
 uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound)
 {
-    // To avoid bias, we need to make the range of the RNG a multiple of
-    // bound, which we do by dropping output less than a threshold.
-    // A naive scheme to calculate the threshold would be to do
-    //
-    //     uint32_t threshold = 0x100000000ull % bound;
-    //
-    // but 64-bit div/mod is slower than 32-bit div/mod (especially on
-    // 32-bit platforms).  In essence, we do
-    //
-    //     uint32_t threshold = (0x100000000ull-bound) % bound;
-    //
-    // because this version will calculate the same modulus, but the LHS
-    // value is less than 2^32.
-
-    uint32_t threshold = -bound % bound;
-
-    // Uniformity guarantees that this loop will terminate.  In practice, it
-    // should usually terminate quickly; on average (assuming all bounds are
-    // equally likely), 82.25% of the time, we can expect it to require just
-    // one iteration.  In the worst case, someone passes a bound of 2^31 + 1
-    // (i.e., 2147483649), which invalidates almost 50% of the range.  In 
-    // practice, bounds are typically small and only a tiny amount of the range
-    // is eliminated.
-    for (;;) {
-        uint32_t r = pcg32_random_r(rng);
-        if (r >= threshold)
-            return r % bound;
-    }
-}
-
-
-uint32_t pcg32_boundedrand(uint32_t bound)
-{
-    return pcg32_boundedrand_r(&pcg32_global, bound);
+  // To avoid bias, we need to make the range of the RNG a multiple of
+  // bound, which we do by dropping output less than a threshold.
+  // A naive scheme to calculate the threshold would be to do
+  //
+  //     uint32_t threshold = 0x100000000ull % bound;
+  //
+  // but 64-bit div/mod is slower than 32-bit div/mod (especially on
+  // 32-bit platforms).  In essence, we do
+  //
+  //     uint32_t threshold = (0x100000000ull-bound) % bound;
+  //
+  // because this version will calculate the same modulus, but the LHS
+  // value is less than 2^32.
+
+  uint32_t threshold = -bound % bound;
+
+  // Uniformity guarantees that this loop will terminate.  In practice, it
+  // should usually terminate quickly; on average (assuming all bounds are
+  // equally likely), 82.25% of the time, we can expect it to require just
+  // one iteration.  In the worst case, someone passes a bound of 2^31 + 1
+  // (i.e., 2147483649), which invalidates almost 50% of the range.  In
+  // practice, bounds are typically small and only a tiny amount of the range
+  // is eliminated.
+  for (;;) {
+    uint32_t r = pcg32_random_r(rng);
+    if (r >= threshold) return r % bound;
+  }
 }
 
+uint32_t pcg32_boundedrand(uint32_t bound) { return pcg32_boundedrand_r(&pcg32_global, bound); }

From 1defcccd924e1a0fa21e5ace333af834c2b14511 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20F=C3=B6rster?=
 <97973773+mfoerste4@users.noreply.github.com>
Date: Tue, 25 Apr 2023 06:33:04 +0200
Subject: [PATCH 27/78] Gram matrix support for sparse input (#1296)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds sparse input support (CSR) for GramMatrix kernel computation. This is a requirement to enable SVM support for sparse input in [cuML issue 2197](https://github.com/rapidsai/cuml/issues/2197).

It also adds row norm computation for CSR which is utilized for expanded L2 norm computation within RBF kernels.

Although this branch introduces a new API it is still backwards compatible with the old GramMatrix API (which is marked as deprecated).

CC @cjnolet @tfeher

Authors:
  - Malte Förster (https://github.com/mfoerste4)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1296
---
 cpp/bench/prims/distance/fused_l2_nn.cu       |   1 +
 cpp/include/raft/core/device_mdspan.hpp       |  30 +
 .../distance/detail/kernels/gram_matrix.cuh   | 448 ++++++++++++---
 .../detail/kernels/kernel_factory.cuh         |  28 +-
 .../detail/kernels/kernel_matrices.cuh        | 531 +++++++++++++++---
 .../raft/sparse/linalg/detail/norm.cuh        |  61 ++
 .../raft/sparse/linalg/detail/spmm.hpp        | 166 ++++++
 cpp/include/raft/sparse/linalg/norm.cuh       |  33 ++
 cpp/include/raft/sparse/linalg/spmm.cuh       |  78 +++
 cpp/test/CMakeLists.txt                       |  10 +-
 cpp/test/distance/gram.cu                     |  99 ++--
 cpp/test/distance/gram_base.cuh               |  87 +++
 cpp/test/sparse/gram.cu                       | 327 +++++++++++
 cpp/test/sparse/norm.cu                       | 123 ++--
 cpp/test/sparse/normalize.cu                  | 127 +++++
 15 files changed, 1856 insertions(+), 293 deletions(-)
 create mode 100644 cpp/include/raft/sparse/linalg/detail/spmm.hpp
 create mode 100644 cpp/include/raft/sparse/linalg/spmm.cuh
 create mode 100644 cpp/test/distance/gram_base.cuh
 create mode 100644 cpp/test/sparse/gram.cu
 create mode 100644 cpp/test/sparse/normalize.cu

diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu
index 1c45572782..a5115407dd 100644
--- a/cpp/bench/prims/distance/fused_l2_nn.cu
+++ b/cpp/bench/prims/distance/fused_l2_nn.cu
@@ -16,6 +16,7 @@
 
 #include <common/benchmark.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
+#include <raft/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
 #if defined RAFT_COMPILED
 #include <raft/distance/specializations.cuh>
diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp
index 7510c388fe..c1898a3f09 100644
--- a/cpp/include/raft/core/device_mdspan.hpp
+++ b/cpp/include/raft/core/device_mdspan.hpp
@@ -255,6 +255,36 @@ auto make_device_matrix_view(ElementType* ptr, IndexType n_rows, IndexType n_col
   return device_matrix_view<ElementType, IndexType, LayoutPolicy>{ptr, extents};
 }
 
+/**
+ * @brief Create a 2-dim mdspan instance for device pointer with a strided layout
+ *        that is restricted to stride 1 in the trailing dimension. It's
+ *        expected that the given layout policy match the layout of the underlying
+ *        pointer.
+ * @tparam ElementType the data type of the matrix elements
+ * @tparam IndexType the index type of the extents
+ * @tparam LayoutPolicy policy for strides and layout ordering
+ * @param[in] ptr on device to wrap
+ * @param[in] n_rows number of rows in pointer
+ * @param[in] n_cols number of columns in pointer
+ * @param[in] stride leading dimension / stride of data
+ */
+template <typename ElementType, typename IndexType, typename LayoutPolicy = layout_c_contiguous>
+auto make_device_strided_matrix_view(ElementType* ptr,
+                                     IndexType n_rows,
+                                     IndexType n_cols,
+                                     IndexType stride)
+{
+  constexpr auto is_row_major = std::is_same_v<LayoutPolicy, layout_c_contiguous>;
+  IndexType stride0           = is_row_major ? (stride > 0 ? stride : n_cols) : 1;
+  IndexType stride1           = is_row_major ? 1 : (stride > 0 ? stride : n_rows);
+
+  assert(is_row_major ? stride0 >= n_cols : stride1 >= n_rows);
+  matrix_extent<IndexType> extents{n_rows, n_cols};
+
+  auto layout = make_strided_layout(extents, std::array<IndexType, 2>{stride0, stride1});
+  return device_matrix_view<ElementType, IndexType, layout_stride>{ptr, layout};
+}
+
 /**
  * @brief Create a 1-dim mdspan instance for device pointer.
  * @tparam ElementType the data type of the vector elements
diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
index aaf3052892..a68b904470 100644
--- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
+++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
@@ -16,13 +16,26 @@
 
 #pragma once
 
+#include <raft/core/device_csr_matrix.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/distance/distance.cuh>
+#include <raft/distance/distance_types.hpp>
+//#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <raft/sparse/distance/distance.cuh>
+#include <raft/sparse/linalg/spmm.cuh>
 
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/gemm.cuh>
 
 namespace raft::distance::kernels::detail {
 
+template <typename math_t>
+using dense_input_matrix_view_t = raft::device_matrix_view<const math_t, int, layout_stride>;
+template <typename math_t>
+using dense_output_matrix_view_t = raft::device_matrix_view<math_t, int, layout_stride>;
+template <typename math_t>
+using csr_input_matrix_view_t = raft::device_csr_matrix_view<const math_t, int, int, int>;
+
 /**
  * Base class for general Gram matrices
  * A Gram matrix is the Hermitian matrix of inner probucts G_ik = <x_i, x_k>
@@ -37,14 +50,135 @@ namespace raft::distance::kernels::detail {
  */
 template <typename math_t>
 class GramMatrixBase {
+ protected:
   cublasHandle_t cublas_handle;
+  bool legacy_interface;
 
  public:
-  GramMatrixBase(cublasHandle_t cublas_handle) : cublas_handle(cublas_handle){};
+  GramMatrixBase() : legacy_interface(false){};
+  [[deprecated]] GramMatrixBase(cublasHandle_t cublas_handle)
+    : cublas_handle(cublas_handle), legacy_interface(true){};
 
   virtual ~GramMatrixBase(){};
 
   /** Convenience function to evaluate the Gram matrix for two vector sets.
+   *  Vector sets are provided in Matrix format
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void operator()(raft::device_resources const& handle,
+                  dense_input_matrix_view_t<math_t> x1,
+                  dense_input_matrix_view_t<math_t> x2,
+                  dense_output_matrix_view_t<math_t> out,
+                  math_t* norm_x1 = nullptr,
+                  math_t* norm_x2 = nullptr)
+  {
+    evaluate(handle, x1, x2, out, norm_x1, norm_x2);
+  }
+
+  /** Convenience function to evaluate the Gram matrix for two vector sets.
+   *  Vector sets are provided in Matrix format
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void operator()(raft::device_resources const& handle,
+                  csr_input_matrix_view_t<math_t> x1,
+                  dense_input_matrix_view_t<math_t> x2,
+                  dense_output_matrix_view_t<math_t> out,
+                  math_t* norm_x1 = nullptr,
+                  math_t* norm_x2 = nullptr)
+  {
+    evaluate(handle, x1, x2, out, norm_x1, norm_x2);
+  }
+
+  /** Convenience function to evaluate the Gram matrix for two vector sets.
+   *  Vector sets are provided in Matrix format
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void operator()(raft::device_resources const& handle,
+                  csr_input_matrix_view_t<math_t> x1,
+                  csr_input_matrix_view_t<math_t> x2,
+                  dense_output_matrix_view_t<math_t> out,
+                  math_t* norm_x1 = nullptr,
+                  math_t* norm_x2 = nullptr)
+  {
+    evaluate(handle, x1, x2, out, norm_x1, norm_x2);
+  }
+
+  // unfortunately, 'evaluate' cannot be templatized as it needs to be virtual
+
+  /** Evaluate the Gram matrix for two vector sets using simple dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  virtual void evaluate(raft::device_resources const& handle,
+                        dense_input_matrix_view_t<math_t> x1,
+                        dense_input_matrix_view_t<math_t> x2,
+                        dense_output_matrix_view_t<math_t> out,
+                        math_t* norm_x1,
+                        math_t* norm_x2)
+  {
+    linear(handle, x1, x2, out);
+  }
+  /** Evaluate the Gram matrix for two vector sets using simple dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  virtual void evaluate(raft::device_resources const& handle,
+                        csr_input_matrix_view_t<math_t> x1,
+                        dense_input_matrix_view_t<math_t> x2,
+                        dense_output_matrix_view_t<math_t> out,
+                        math_t* norm_x1,
+                        math_t* norm_x2)
+  {
+    linear(handle, x1, x2, out);
+  }
+  /** Evaluate the Gram matrix for two vector sets using simple dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  virtual void evaluate(raft::device_resources const& handle,
+                        csr_input_matrix_view_t<math_t> x1,
+                        csr_input_matrix_view_t<math_t> x2,
+                        dense_output_matrix_view_t<math_t> out,
+                        math_t* norm_x1,
+                        math_t* norm_x2)
+  {
+    linear(handle, x1, x2, out);
+  }
+
+  /** Evaluate the Gram matrix for two vector sets using simple dot product.
    *
    * @param [in] x1 device array of vectors, size [n1*n_cols]
    * @param [in] n1 number vectors in x1
@@ -55,29 +189,26 @@ class GramMatrixBase {
    * @param [in] is_row_major whether the input and output matrices are in row
    *        major format
    * @param [in] stream cuda stream
-   * @param ld1 leading dimension of x1
-   * @param ld2 leading dimension of x2
-   * @param ld_out leading dimension of out
+   * @param ld1 leading dimension of x1 (usually it is n1)
+   * @param ld2 leading dimension of x2 (usually it is n2)
+   * @param ld_out leading dimension of out (usually it is n1)
    */
-  virtual void operator()(const math_t* x1,
-                          int n1,
-                          int n_cols,
-                          const math_t* x2,
-                          int n2,
-                          math_t* out,
-                          bool is_row_major,
-                          cudaStream_t stream,
-                          int ld1    = 0,
-                          int ld2    = 0,
-                          int ld_out = 0)
+  [[deprecated]] virtual void evaluate(const math_t* x1,
+                                       int n1,
+                                       int n_cols,
+                                       const math_t* x2,
+                                       int n2,
+                                       math_t* out,
+                                       bool is_row_major,
+                                       cudaStream_t stream,
+                                       int ld1,
+                                       int ld2,
+                                       int ld_out)
   {
-    if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; }
-    if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; }
-    if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; }
-    evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
+    linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
   }
 
-  /** Evaluate the Gram matrix for two vector sets using simple dot product.
+  /** Convenience function to evaluate the Gram matrix for two vector sets.
    *
    * @param [in] x1 device array of vectors, size [n1*n_cols]
    * @param [in] n1 number vectors in x1
@@ -88,30 +219,30 @@ class GramMatrixBase {
    * @param [in] is_row_major whether the input and output matrices are in row
    *        major format
    * @param [in] stream cuda stream
-   * @param ld1 leading dimension of x1 (usually it is n1)
-   * @param ld2 leading dimension of x2 (usually it is n2)
-   * @param ld_out leading dimension of out (usually it is n1)
+   * @param ld1 leading dimension of x1
+   * @param ld2 leading dimension of x2
+   * @param ld_out leading dimension of out
    */
-  virtual void evaluate(const math_t* x1,
-                        int n1,
-                        int n_cols,
-                        const math_t* x2,
-                        int n2,
-                        math_t* out,
-                        bool is_row_major,
-                        cudaStream_t stream,
-                        int ld1,
-                        int ld2,
-                        int ld_out)
+  [[deprecated]] void operator()(const math_t* x1,
+                                 int n1,
+                                 int n_cols,
+                                 const math_t* x2,
+                                 int n2,
+                                 math_t* out,
+                                 bool is_row_major,
+                                 cudaStream_t stream,
+                                 int ld1    = 0,
+                                 int ld2    = 0,
+                                 int ld_out = 0)
   {
-    linear(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
+    ASSERT(legacy_interface, "Legacy interface can only be used with legacy ctor.");
+    if (ld1 <= 0) { ld1 = is_row_major ? n_cols : n1; }
+    if (ld2 <= 0) { ld2 = is_row_major ? n_cols : n2; }
+    if (ld_out <= 0) { ld_out = is_row_major ? n2 : n1; }
+    evaluate(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
   }
 
-  // private:
-  // The following methods should be private, they are kept public to avoid:
-  // "error: The enclosing parent function ("distance") for an extended
-  // __device__ lambda cannot have private or protected access within its class"
-
+ protected:
   /** Calculates the Gram matrix using simple dot product between vector sets.
    *
    * out = x1 * x2
@@ -131,17 +262,17 @@ class GramMatrixBase {
    * @param ld2 leading dimension of x2
    * @param ld_out leading dimension of out
    */
-  void linear(const math_t* x1,
-              int n1,
-              int n_cols,
-              const math_t* x2,
-              int n2,
-              math_t* out,
-              bool is_row_major,
-              cudaStream_t stream,
-              int ld1,
-              int ld2,
-              int ld_out)
+  [[deprecated]] void linear(const math_t* x1,
+                             int n1,
+                             int n_cols,
+                             const math_t* x2,
+                             int n2,
+                             math_t* out,
+                             bool is_row_major,
+                             cudaStream_t stream,
+                             int ld1,
+                             int ld2,
+                             int ld_out)
   {
     math_t alpha = 1.0;
     math_t beta  = 0.0;
@@ -182,37 +313,198 @@ class GramMatrixBase {
     }
   }
 
-  /** Calculates the Gram matrix using Euclidean distance.
+ protected:
+  bool get_is_row_major(dense_output_matrix_view_t<math_t> matrix)
+  {
+    return (matrix.stride(1) == 1);
+  }
+
+  bool get_is_row_major(dense_input_matrix_view_t<math_t> matrix)
+  {
+    return (matrix.stride(1) == 1);
+  }
+
+  bool get_is_col_major(dense_output_matrix_view_t<math_t> matrix)
+  {
+    return (matrix.stride(0) == 1);
+  }
+
+  bool get_is_col_major(dense_input_matrix_view_t<math_t> matrix)
+  {
+    return (matrix.stride(0) == 1);
+  }
+
+  /** Calculates the Gram matrix using simple dot product between vector sets.
+   *
+   * out = x1 * x2
    *
    * Can be used as a building block for more complex kernel functions.
    *
-   * @param [in] x1 device array of vectors, size [n1*n_cols]
-   * @param [in] n1 number vectors in x1
-   * @param [in] n_cols number of columns (features) in x1 and x2
-   * @param [in] x2 device array of vectors, size [n2*n_cols]
-   * @param [in] n2 number vectors in x2
-   * @param [out] out device buffer to store the Gram matrix, size [n1*n2]
-   * @param [in] is_row_major whether the input and output matrices are in row
-   *        major format
-   * @param [in] stream cuda stream
-   * @param ld1 leading dimension of x1
-   * @param ld2 leading dimension of x2
-   * @param ld_out leading dimension of out
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
    */
-  virtual void distance(const math_t* x1,
-                        int n1,
-                        int n_cols,
-                        const math_t* x2,
-                        int n2,
-                        math_t* out,
-                        bool is_row_major,
-                        cudaStream_t stream,
-                        int ld1,
-                        int ld2,
-                        int ld_out)
-  {
-    raft::distance::distance<raft::distance::DistanceType::L2Unexpanded, math_t, math_t, math_t>(
-      raft::device_resources(stream), x1, x2, out, n1, n2, n_cols, is_row_major);
+  void linear(raft::device_resources const& handle,
+              dense_input_matrix_view_t<math_t> x1,
+              dense_input_matrix_view_t<math_t> x2,
+              dense_output_matrix_view_t<math_t> out)
+  {
+    // check is_row_major consistency
+    bool is_row_major = get_is_row_major(x1) && get_is_row_major(x2) && get_is_row_major(out);
+    bool is_col_major = get_is_col_major(x1) && get_is_col_major(x2) && get_is_col_major(out);
+    ASSERT(is_row_major || is_col_major,
+           "GramMatrix leading dimensions for x1, x2 and out do not match");
+
+    // check dimensions
+    int n1     = out.extent(0);
+    int n2     = out.extent(1);
+    int n_cols = x1.extent(1);
+    ASSERT(x1.extent(0) == n1, "GramMatrix input matrix dimensions for x1 and out do not match");
+    ASSERT(x2.extent(0) == n2, "GramMatrix input matrix dimensions for x2 and out do not match");
+    ASSERT(x2.extent(1) == n_cols, "GramMatrix input matrix dimensions for x1 and x2 do not match");
+
+    // extract major stride
+    int ld1    = is_row_major ? x1.stride(0) : x1.stride(1);
+    int ld2    = is_row_major ? x2.stride(0) : x2.stride(1);
+    int ld_out = is_row_major ? out.stride(0) : out.stride(1);
+
+    math_t alpha = 1.0;
+    math_t beta  = 0.0;
+    if (is_row_major) {
+      // #TODO: Use mdspan-based API when stride-capable
+      // https://github.com/rapidsai/raft/issues/875
+      raft::linalg::gemm(handle,
+                         true,
+                         false,
+                         n2,
+                         n1,
+                         n_cols,
+                         &alpha,
+                         x2.data_handle(),
+                         ld2,
+                         x1.data_handle(),
+                         ld1,
+                         &beta,
+                         out.data_handle(),
+                         ld_out,
+                         handle.get_stream());
+    } else {
+      // #TODO: Use mdspan-based API when stride-capable
+      // https://github.com/rapidsai/raft/issues/875
+      raft::linalg::gemm(handle,
+                         false,
+                         true,
+                         n1,
+                         n2,
+                         n_cols,
+                         &alpha,
+                         x1.data_handle(),
+                         ld1,
+                         x2.data_handle(),
+                         ld2,
+                         &beta,
+                         out.data_handle(),
+                         ld_out,
+                         handle.get_stream());
+    }
+  }
+
+  /** Calculates the Gram matrix using simple dot product between vector sets.
+   *
+   * out = x1 * x2
+   *
+   * Can be used as a building block for more complex kernel functions.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   */
+  void linear(raft::device_resources const& handle,
+              csr_input_matrix_view_t<math_t> x1,
+              dense_input_matrix_view_t<math_t> x2,
+              dense_output_matrix_view_t<math_t> out)
+  {
+    // check is_row_major consistency
+    bool is_row_major = get_is_row_major(x2) && get_is_row_major(out);
+    bool is_col_major = get_is_col_major(x2) && get_is_col_major(out);
+    ASSERT(is_row_major || is_col_major,
+           "GramMatrix leading dimensions for x2 and out do not match");
+
+    // check dimensions
+    auto x1_structure = x1.structure_view();
+    ASSERT(x1_structure.get_n_rows() == out.extent(0),
+           "GramMatrix input matrix dimensions for x1 and out do not match");
+    ASSERT(x2.extent(0) == out.extent(1),
+           "GramMatrix input matrix dimensions for x2 and out do not match");
+    ASSERT(x2.extent(1) == x1_structure.get_n_cols(),
+           "GramMatrix input matrix dimensions for x1 and x2 do not match");
+
+    math_t alpha = 1.0;
+    math_t beta  = 0.0;
+
+    raft::sparse::linalg::spmm(handle, false, true, &alpha, x1, x2, &beta, out);
+  }
+
+  /** Calculates the Gram matrix using simple dot product between vector sets.
+   *
+   * out = x1 * x2
+   *
+   * Can be used as a building block for more complex kernel functions.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   */
+  void linear(raft::device_resources const& handle,
+              csr_input_matrix_view_t<math_t> x1,
+              csr_input_matrix_view_t<math_t> x2,
+              dense_output_matrix_view_t<math_t> out)
+  {
+    // check is_row_major consistency
+    bool is_row_major = get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    int minor_out     = is_row_major ? out.extent(1) : out.extent(0);
+    ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter");
+
+    auto x1_structure = x1.structure_view();
+    auto x2_structure = x2.structure_view();
+    raft::sparse::distance::distances_config_t<int, math_t> dist_config(handle);
+
+    // switch a,b based on is_row_major
+    if (!is_row_major) {
+      dist_config.a_nrows   = x2_structure.get_n_rows();
+      dist_config.a_ncols   = x2_structure.get_n_cols();
+      dist_config.a_nnz     = x2_structure.get_nnz();
+      dist_config.a_indptr  = const_cast<int*>(x2_structure.get_indptr().data());
+      dist_config.a_indices = const_cast<int*>(x2_structure.get_indices().data());
+      dist_config.a_data    = const_cast<math_t*>(x2.get_elements().data());
+      dist_config.b_nrows   = x1_structure.get_n_rows();
+      dist_config.b_ncols   = x1_structure.get_n_cols();
+      dist_config.b_nnz     = x1_structure.get_nnz();
+      dist_config.b_indptr  = const_cast<int*>(x1_structure.get_indptr().data());
+      dist_config.b_indices = const_cast<int*>(x1_structure.get_indices().data());
+      dist_config.b_data    = const_cast<math_t*>(x1.get_elements().data());
+    } else {
+      dist_config.a_nrows   = x1_structure.get_n_rows();
+      dist_config.a_ncols   = x1_structure.get_n_cols();
+      dist_config.a_nnz     = x1_structure.get_nnz();
+      dist_config.a_indptr  = const_cast<int*>(x1_structure.get_indptr().data());
+      dist_config.a_indices = const_cast<int*>(x1_structure.get_indices().data());
+      dist_config.a_data    = const_cast<math_t*>(x1.get_elements().data());
+      dist_config.b_nrows   = x2_structure.get_n_rows();
+      dist_config.b_ncols   = x2_structure.get_n_cols();
+      dist_config.b_nnz     = x2_structure.get_nnz();
+      dist_config.b_indptr  = const_cast<int*>(x2_structure.get_indptr().data());
+      dist_config.b_indices = const_cast<int*>(x2_structure.get_indices().data());
+      dist_config.b_data    = const_cast<math_t*>(x2.get_elements().data());
+    }
+
+    raft::sparse::distance::pairwiseDistance(
+      out.data_handle(), dist_config, raft::distance::DistanceType::InnerProduct, 0.0);
   }
 };
+
 };  // end namespace raft::distance::kernels::detail
diff --git a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh
index 1aa6809bcd..bb3ff1c2f5 100644
--- a/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh
+++ b/cpp/include/raft/distance/detail/kernels/kernel_factory.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,19 +26,35 @@ namespace raft::distance::kernels::detail {
 template <typename math_t>
 class KernelFactory {
  public:
-  static GramMatrixBase<math_t>* create(KernelParams params, cublasHandle_t cublas_handle)
+  static GramMatrixBase<math_t>* create(KernelParams params)
   {
     GramMatrixBase<math_t>* res;
     // KernelParams is not templated, we convert the parameters to math_t here:
     math_t coef0 = params.coef0;
     math_t gamma = params.gamma;
     switch (params.kernel) {
-      case LINEAR: res = new GramMatrixBase<math_t>(cublas_handle); break;
+      case LINEAR: res = new GramMatrixBase<math_t>(); break;
+      case POLYNOMIAL: res = new PolynomialKernel<math_t, int>(params.degree, gamma, coef0); break;
+      case TANH: res = new TanhKernel<math_t>(gamma, coef0); break;
+      case RBF: res = new RBFKernel<math_t>(gamma); break;
+      default: throw raft::exception("Kernel not implemented");
+    }
+    return res;
+  }
+
+  [[deprecated]] static GramMatrixBase<math_t>* create(KernelParams params, cublasHandle_t handle)
+  {
+    GramMatrixBase<math_t>* res;
+    // KernelParams is not templated, we convert the parameters to math_t here:
+    math_t coef0 = params.coef0;
+    math_t gamma = params.gamma;
+    switch (params.kernel) {
+      case LINEAR: res = new GramMatrixBase<math_t>(handle); break;
       case POLYNOMIAL:
-        res = new PolynomialKernel<math_t, int>(params.degree, gamma, coef0, cublas_handle);
+        res = new PolynomialKernel<math_t, int>(params.degree, gamma, coef0, handle);
         break;
-      case TANH: res = new TanhKernel<math_t>(gamma, coef0, cublas_handle); break;
-      case RBF: res = new RBFKernel<math_t>(gamma); break;
+      case TANH: res = new TanhKernel<math_t>(gamma, coef0, handle); break;
+      case RBF: res = new RBFKernel<math_t>(gamma, handle); break;
       default: throw raft::exception("Kernel not implemented");
     }
     return res;
diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
index d1465efdb0..4b000add21 100644
--- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
+++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
@@ -21,6 +21,7 @@
 
 #include <raft/distance/distance.cuh>
 #include <raft/linalg/gemm.cuh>
+#include <raft/sparse/linalg/norm.cuh>
 
 namespace raft::distance::kernels::detail {
 
@@ -100,6 +101,38 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga
     }
 }
 
+/** Epiloge function for rbf kernel using expansion.
+ *
+ * Calculates output_ij = exp(-gain * (norm_x_i + norm_y_j - 2*input_ij));
+ *
+ * Intended usage
+ *   - input is the product of two matrices X and Y input_ij = sum_k X_ik * Y_jk
+ *   - norm_x_i = l2_norm(x_i), where x_i is the i-th row of matrix X
+ *   - norm_y_j = l2_norm(y_j), where y_j is the j-th row of matrix Y
+ *
+ * @param inout device vector in column major format, size [ld * cols]
+ * @param ld leading dimension of the inout buffer
+ * @param rows number of rows (rows <= ld)
+ * @param cols number of columns
+ * @param norm_x l2-norm of X's rows
+ * @param norm_y l2-norm of Y's rows
+ * @param gain
+ */
+template <typename math_t>
+__global__ void rbf_kernel_expanded(
+  math_t* inout, int ld, int rows, int cols, math_t* norm_x, math_t* norm_y, math_t gain)
+{
+  for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols;
+       tidy += blockDim.y * gridDim.y) {
+    math_t norm_y_val = norm_y[tidy];
+    for (size_t tidx = threadIdx.x + blockIdx.x * blockDim.x; tidx < rows;
+         tidx += blockDim.x * gridDim.x) {
+      inout[tidx + tidy * ld] =
+        exp(-1.0 * gain * (norm_x[tidx] + norm_y_val - inout[tidx + tidy * ld] * 2));
+    }
+  }
+}
+
 /**
  * Create a kernel matrix using polynomial kernel function.
  */
@@ -138,11 +171,69 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
    * @param exponent
    * @param gain
    * @param offset
-   * @param cublas_handle
    */
-  PolynomialKernel(exp_t exponent, math_t gain, math_t offset, cublasHandle_t cublas_handle)
-    : GramMatrixBase<math_t>(cublas_handle), exponent(exponent), gain(gain), offset(offset)
+  PolynomialKernel(exp_t exponent, math_t gain, math_t offset)
+    : GramMatrixBase<math_t>(), exponent(exponent), gain(gain), offset(offset)
+  {
+  }
+
+  [[deprecated]] PolynomialKernel(exp_t exponent, math_t gain, math_t offset, cublasHandle_t handle)
+    : GramMatrixBase<math_t>(handle), exponent(exponent), gain(gain), offset(offset)
+  {
+  }
+
+  /** Evaluate kernel matrix using polynomial kernel.
+   *
+   * output[i,k] = (gain*<x1_i, x2_k> + offset)^exponent,
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and < , > denotes dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                dense_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+  }
+
+  /** Evaluate kernel matrix using polynomial kernel.
+   *
+   * output[i,k] = (gain*<x1_i, x2_k> + offset)^exponent,
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and < , > denotes dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
   {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
   }
 
   /** Evaluate kernel matrix using polynomial kernel.
@@ -150,32 +241,57 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
    * output[i,k] = (gain*<x1_i, x2_k> + offset)^exponent,
    * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
    * in the x2 set, and < , > denotes dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                csr_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+  }
+
+  /** Evaluate the Gram matrix using the legacy interface.
    *
    * @param [in] x1 device array of vectors, size [n1*n_cols]
    * @param [in] n1 number vectors in x1
-   * @param [in] n_cols number of features in x1 and x2
-   * @param [in] x2 device array of vectors, size [n2*cols]
+   * @param [in] n_cols number of columns (features) in x1 and x2
+   * @param [in] x2 device array of vectors, size [n2*n_cols]
    * @param [in] n2 number vectors in x2
    * @param [out] out device buffer to store the Gram matrix, size [n1*n2]
    * @param [in] is_row_major whether the input and output matrices are in row
    *        major format
    * @param [in] stream cuda stream
-   * @param ld1 leading dimension of x1
-   * @param ld2 leading dimension of x2
-   * @param ld_out leading dimension of out
+   * @param ld1 leading dimension of x1 (usually it is n1)
+   * @param ld2 leading dimension of x2 (usually it is n2)
+   * @param ld_out leading dimension of out (usually it is n1)
    */
-  void evaluate(const math_t* x1,
-                int n1,
-                int n_cols,
-                const math_t* x2,
-                int n2,
-                math_t* out,
-                bool is_row_major,
-                cudaStream_t stream,
-                int ld1,
-                int ld2,
-                int ld_out)
+  [[deprecated]] void evaluate(const math_t* x1,
+                               int n1,
+                               int n_cols,
+                               const math_t* x2,
+                               int n2,
+                               math_t* out,
+                               bool is_row_major,
+                               cudaStream_t stream,
+                               int ld1,
+                               int ld2,
+                               int ld_out)
   {
+    ASSERT(GramMatrixBase<math_t>::legacy_interface,
+           "Legacy interface can only be used with legacy ctor.");
     GramMatrixBase<math_t>::linear(
       x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
     applyKernel(out, ld_out, n1, n2, is_row_major, stream);
@@ -216,10 +332,11 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * @tparam math_t floating point type
    * @param gain
    * @param offset
-   * @param cublas_handle
    */
-  TanhKernel(math_t gain, math_t offset, cublasHandle_t cublas_handle)
-    : GramMatrixBase<math_t>(cublas_handle), gain(gain), offset(offset)
+  TanhKernel(math_t gain, math_t offset) : GramMatrixBase<math_t>(), gain(gain), offset(offset) {}
+
+  [[deprecated]] TanhKernel(math_t gain, math_t offset, cublasHandle_t handle)
+    : GramMatrixBase<math_t>(handle), gain(gain), offset(offset)
   {
   }
 
@@ -229,12 +346,87 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
    * in the x2 set, and < , > denotes dot product.
    *
-   * @param [in] x1 device array of vectors,
-   *  size [n1*n_cols]
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                dense_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+  }
+
+  /** Evaluate kernel matrix using tanh kernel.
+   *
+   * output_[i + k*n1] = (gain*<x1_i, x2_k> + offset)^exponent,
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and < , > denotes dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+  }
+
+  /** Evaluate kernel matrix using tanh kernel.
+   *
+   * output_[i + k*n1] = (gain*<x1_i, x2_k> + offset)^exponent,
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and < , > denotes dot product.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 unused.
+   * @param norm_x2 unused.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                csr_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(
+      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+  }
+
+  /** Evaluate the Gram matrix using the legacy interface.
+   *
+   * @param [in] x1 device array of vectors, size [n1*n_cols]
    * @param [in] n1 number vectors in x1
-   * @param [in] n_cols number of features in x1 and x2
-   * @param [in] x2 device array of vectors,
-   *   size [n2*n_cols]
+   * @param [in] n_cols number of columns (features) in x1 and x2
+   * @param [in] x2 device array of vectors, size [n2*n_cols]
    * @param [in] n2 number vectors in x2
    * @param [out] out device buffer to store the Gram matrix, size [n1*n2]
    * @param [in] is_row_major whether the input and output matrices are in row
@@ -244,18 +436,20 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * @param ld2 leading dimension of x2 (usually it is n2)
    * @param ld_out leading dimension of out (usually it is n1)
    */
-  void evaluate(const math_t* x1,
-                int n1,
-                int n_cols,
-                const math_t* x2,
-                int n2,
-                math_t* out,
-                bool is_row_major,
-                cudaStream_t stream,
-                int ld1,
-                int ld2,
-                int ld_out)
+  [[deprecated]] void evaluate(const math_t* x1,
+                               int n1,
+                               int n_cols,
+                               const math_t* x2,
+                               int n2,
+                               math_t* out,
+                               bool is_row_major,
+                               cudaStream_t stream,
+                               int ld1,
+                               int ld2,
+                               int ld_out)
   {
+    ASSERT(GramMatrixBase<math_t>::legacy_interface,
+           "Legacy interface can only be used with legacy ctor.");
     GramMatrixBase<math_t>::linear(
       x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
     applyKernel(out, ld_out, n1, n2, is_row_major, stream);
@@ -269,21 +463,23 @@ template <typename math_t>
 class RBFKernel : public GramMatrixBase<math_t> {
   math_t gain;
 
-  void applyKernel(
-    math_t* inout, int ld, int rows, int cols, bool is_row_major, cudaStream_t stream)
+  void applyKernel(math_t* inout,
+                   int ld,
+                   int rows,
+                   int cols,
+                   math_t* norm_x1,
+                   math_t* norm_x2,
+                   bool is_row_major,
+                   cudaStream_t stream)
   {
-    const int n_minor = is_row_major ? cols : rows;
-    if (ld == n_minor) {
-      rbf_kernel_nopad<<<raft::ceildiv<size_t>((size_t)rows * cols, 128), 128, 0, stream>>>(
-        inout, rows * cols, gain);
-    } else {
-      int n1 = is_row_major ? cols : rows;
-      int n2 = is_row_major ? rows : cols;
-      rbf_kernel<<<dim3(raft::ceildiv(n1, 32), raft::ceildiv(n2, 4), 1),
-                   dim3(32, 4, 1),
-                   0,
-                   stream>>>(inout, ld, n1, n2, gain);
-    }
+    int n1          = is_row_major ? cols : rows;
+    int n2          = is_row_major ? rows : cols;
+    math_t* norm_n1 = is_row_major ? norm_x2 : norm_x1;
+    math_t* norm_n2 = is_row_major ? norm_x1 : norm_x2;
+    rbf_kernel_expanded<<<dim3(raft::ceildiv(n1, 32), raft::ceildiv(n2, 4), 1),
+                          dim3(32, 4, 1),
+                          0,
+                          stream>>>(inout, ld, n1, n2, norm_n1, norm_n2, gain);
   }
 
  public:
@@ -295,61 +491,230 @@ class RBFKernel : public GramMatrixBase<math_t> {
    * @tparam math_t floating point type
    * @param gain
    */
-  RBFKernel(math_t gain) : GramMatrixBase<math_t>(NULL), gain(gain) {}
+  RBFKernel(math_t gain) : GramMatrixBase<math_t>(), gain(gain) {}
+
+  [[deprecated]] RBFKernel(math_t gain, cublasHandle_t handle)
+    : GramMatrixBase<math_t>(handle), gain(gain)
+  {
+  }
+
+  void matrixRowNormL2(raft::device_resources const& handle,
+                       dense_input_matrix_view_t<math_t> matrix,
+                       math_t* target)
+  {
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(matrix);
+    int minor         = is_row_major ? matrix.extent(1) : matrix.extent(0);
+    int ld            = is_row_major ? matrix.stride(0) : matrix.stride(1);
+    ASSERT(ld == minor, "RBF Kernel lazy rowNorm compute does not support ld parameter");
+    raft::linalg::rowNorm(target,
+                          matrix.data_handle(),
+                          matrix.extent(1),
+                          matrix.extent(0),
+                          raft::linalg::NormType::L2Norm,
+                          is_row_major,
+                          handle.get_stream());
+  }
+
+  void matrixRowNormL2(raft::device_resources const& handle,
+                       csr_input_matrix_view_t<math_t> matrix,
+                       math_t* target)
+  {
+    auto matrix_structure = matrix.structure_view();
+    raft::sparse::linalg::rowNormCsr(handle,
+                                     matrix_structure.get_indptr().data(),
+                                     matrix.get_elements().data(),
+                                     matrix_structure.get_nnz(),
+                                     matrix_structure.get_n_rows(),
+                                     target,
+                                     raft::linalg::NormType::L2Norm);
+  }
 
   /** Evaluate kernel matrix using RBF kernel.
    *
    * output_[i + k*n1] = exp(-gain*|x1_i - x2_k|^2),
    * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
    * in the x2 set, and | | euclidean distance.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 dense device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void evaluate(raft::device_resources const& handle,
+                dense_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    cudaStream_t stream = handle.get_stream();
+
+    // lazy compute norms if not given
+    rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
+    rmm::device_uvector<math_t> tmp_norm_x2(0, stream);
+    if (norm_x1 == nullptr) {
+      tmp_norm_x1.reserve(x1.extent(0), stream);
+      norm_x1 = tmp_norm_x1.data();
+      matrixRowNormL2(handle, x1, norm_x1);
+    }
+    if (norm_x2 == nullptr) {
+      tmp_norm_x2.reserve(x2.extent(0), stream);
+      norm_x2 = tmp_norm_x2.data();
+      matrixRowNormL2(handle, x2, norm_x2);
+    }
+
+    // compute L2expanded
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                norm_x1,
+                norm_x2,
+                is_row_major,
+                handle.get_stream());
+  }
+
+  /** Evaluate kernel matrix using RBF kernel.
+   *
+   * output_[i + k*n1] = exp(-gain*|x1_i - x2_k|^2),
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and | | euclidean distance.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 dense device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                dense_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    cudaStream_t stream = handle.get_stream();
+
+    // lazy compute norms if not given
+    rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
+    rmm::device_uvector<math_t> tmp_norm_x2(0, stream);
+    if (norm_x1 == nullptr) {
+      tmp_norm_x1.reserve(x1.structure_view().get_n_rows(), stream);
+      norm_x1 = tmp_norm_x1.data();
+      matrixRowNormL2(handle, x1, norm_x1);
+    }
+    if (norm_x2 == nullptr) {
+      tmp_norm_x2.reserve(x2.extent(0), stream);
+      norm_x2 = tmp_norm_x2.data();
+      matrixRowNormL2(handle, x2, norm_x2);
+    }
+
+    // compute L2expanded
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                norm_x1,
+                norm_x2,
+                is_row_major,
+                handle.get_stream());
+  }
+
+  /** Evaluate kernel matrix using RBF kernel.
+   *
+   * output_[i + k*n1] = exp(-gain*|x1_i - x2_k|^2),
+   * where x1_i is the i-th vector from the x1 set, and x2_k is k-th vector
+   * in the x2 set, and | | euclidean distance.
+   *
+   * @param [in] handle raft handle
+   * @param [in] x1 csr device matrix view, size [n1*n_cols]
+   * @param [in] x2 csr device matrix view, size [n2*n_cols]
+   * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
+   * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
+   * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
+   */
+  void evaluate(raft::device_resources const& handle,
+                csr_input_matrix_view_t<math_t> x1,
+                csr_input_matrix_view_t<math_t> x2,
+                dense_output_matrix_view_t<math_t> out,
+                math_t* norm_x1,
+                math_t* norm_x2)
+  {
+    cudaStream_t stream = handle.get_stream();
+
+    // lazy compute norms if not given
+    rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
+    rmm::device_uvector<math_t> tmp_norm_x2(0, stream);
+    if (norm_x1 == nullptr) {
+      tmp_norm_x1.reserve(x1.structure_view().get_n_rows(), stream);
+      norm_x1 = tmp_norm_x1.data();
+      matrixRowNormL2(handle, x1, norm_x1);
+    }
+    if (norm_x2 == nullptr) {
+      tmp_norm_x2.reserve(x2.structure_view().get_n_rows(), stream);
+      norm_x2 = tmp_norm_x2.data();
+      matrixRowNormL2(handle, x2, norm_x2);
+    }
+
+    // compute L2expanded
+    bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
+    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
+    GramMatrixBase<math_t>::linear(handle, x1, x2, out);
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                norm_x1,
+                norm_x2,
+                is_row_major,
+                handle.get_stream());
+  }
+
+  /** Evaluate the Gram matrix using the legacy interface.
    *
    * @param [in] x1 device array of vectors, size [n1*n_cols]
    * @param [in] n1 number vectors in x1
-   * @param [in] n_cols number of features in x1 and x2
+   * @param [in] n_cols number of columns (features) in x1 and x2
    * @param [in] x2 device array of vectors, size [n2*n_cols]
    * @param [in] n2 number vectors in x2
    * @param [out] out device buffer to store the Gram matrix, size [n1*n2]
    * @param [in] is_row_major whether the input and output matrices are in row
    *        major format
    * @param [in] stream cuda stream
-   * @param ld1 leading dimension of x1, currently only ld1 == n1 is supported
-   * @param ld2 leading dimension of x2, currently only ld2 == n2 is supported
-   * @param ld_out leading dimension of out, only ld_out == n1 is supported
+   * @param ld1 leading dimension of x1 (usually it is n1)
+   * @param ld2 leading dimension of x2 (usually it is n2)
+   * @param ld_out leading dimension of out (usually it is n1)
    */
-  void evaluate(const math_t* x1,
-                int n1,
-                int n_cols,
-                const math_t* x2,
-                int n2,
-                math_t* out,
-                bool is_row_major,
-                cudaStream_t stream,
-                int ld1,
-                int ld2,
-                int ld_out)
+  [[deprecated]] void evaluate(const math_t* x1,
+                               int n1,
+                               int n_cols,
+                               const math_t* x2,
+                               int n2,
+                               math_t* out,
+                               bool is_row_major,
+                               cudaStream_t stream,
+                               int ld1,
+                               int ld2,
+                               int ld_out)
   {
+    ASSERT(GramMatrixBase<math_t>::legacy_interface,
+           "Legacy interface can only be used with legacy ctor.");
     int minor1    = is_row_major ? n_cols : n1;
     int minor2    = is_row_major ? n_cols : n2;
     int minor_out = is_row_major ? n2 : n1;
     ASSERT(ld1 == minor1, "RBF Kernel distance does not support ld1 parameter");
     ASSERT(ld2 == minor2, "RBF Kernel distance does not support ld2 parameter");
     ASSERT(ld_out == minor_out, "RBF Kernel distance does not support ld_out parameter");
-    distance(x1, n1, n_cols, x2, n2, out, is_row_major, stream, ld1, ld2, ld_out);
-  }
 
-  /** Customize distance function withe RBF epilogue */
-  void distance(const math_t* x1,
-                int n1,
-                int n_cols,
-                const math_t* x2,
-                int n2,
-                math_t* out,
-                bool is_row_major,
-                cudaStream_t stream,
-                int ld1,
-                int ld2,
-                int ld_out)
-  {
     math_t gain   = this->gain;
     using index_t = int64_t;
 
diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh
index a5767be736..3cb4a3e353 100644
--- a/cpp/include/raft/sparse/linalg/detail/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh
@@ -17,10 +17,15 @@
 #pragma once
 
 #include <cusparse_v2.h>
+#include <raft/common/nvtx.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/linalg/norm_types.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <raft/sparse/op/row_op.cuh>
+
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
 
@@ -170,6 +175,62 @@ void csr_row_normalize_max(const int* ia,  // csr row ind array (sorted by row)
   RAFT_CUDA_TRY(cudaGetLastError());
 }
 
+template <typename Type,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void csr_row_op_wrapper(const IdxType* ia,
+                        const Type* data,
+                        IdxType nnz,
+                        IdxType N,
+                        Type init,
+                        Type* norm,
+                        cudaStream_t stream,
+                        MainLambda main_op     = raft::identity_op(),
+                        ReduceLambda reduce_op = raft::add_op(),
+                        FinalLambda final_op   = raft::identity_op())
+{
+  op::csr_row_op<IdxType>(
+    ia,
+    N,
+    nnz,
+    [data, init, norm, main_op, reduce_op, final_op] __device__(
+      IdxType row, IdxType start_idx, IdxType stop_idx) {
+      norm[row] = init;
+      for (IdxType i = start_idx; i < stop_idx; i++)
+        norm[row] = final_op(reduce_op(norm[row], main_op(data[i])));
+    },
+    stream);
+}
+
+template <typename Type, typename IdxType, typename Lambda>
+void rowNormCsrCaller(const IdxType* ia,
+                      const Type* data,
+                      IdxType nnz,
+                      IdxType N,
+                      Type* norm,
+                      raft::linalg::NormType type,
+                      Lambda fin_op,
+                      cudaStream_t stream)
+{
+  switch (type) {
+    case raft::linalg::NormType::L1Norm:
+      csr_row_op_wrapper(
+        ia, data, nnz, N, (Type)0, norm, stream, raft::abs_op(), raft::add_op(), fin_op);
+      break;
+    case raft::linalg::NormType::L2Norm:
+      csr_row_op_wrapper(
+        ia, data, nnz, N, (Type)0, norm, stream, raft::sq_op(), raft::add_op(), fin_op);
+      break;
+    case raft::linalg::NormType::LinfNorm:
+      csr_row_op_wrapper(
+        ia, data, nnz, N, (Type)0, norm, stream, raft::abs_op(), raft::max_op(), fin_op);
+      break;
+    default: THROW("Unsupported norm type: %d", type);
+  };
+}
+
 };  // end NAMESPACE detail
 };  // end NAMESPACE linalg
 };  // end NAMESPACE sparse
diff --git a/cpp/include/raft/sparse/linalg/detail/spmm.hpp b/cpp/include/raft/sparse/linalg/detail/spmm.hpp
new file mode 100644
index 0000000000..b61b561a12
--- /dev/null
+++ b/cpp/include/raft/sparse/linalg/detail/spmm.hpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdspan.hpp>
+#include <raft/sparse/detail/cusparse_wrappers.h>
+
+namespace raft {
+namespace sparse {
+namespace linalg {
+namespace detail {
+
+/**
+ * @brief determine common data layout for both dense matrices
+ * @tparam ValueType Data type of Y,Z (float/double)
+ * @tparam IndexType Type of Y,Z
+ * @tparam LayoutPolicyY layout of Y
+ * @tparam LayoutPolicyZ layout of Z
+ * @param[in] x input raft::device_matrix_view
+ * @param[in] y input raft::device_matrix_view
+ * @returns dense matrix descriptor to be used by cuSparse API
+ */
+template <typename ValueType, typename IndexType, typename LayoutPolicyY, typename LayoutPolicyZ>
+bool is_row_major(raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY>& y,
+                  raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ>& z)
+{
+  bool is_row_major = z.stride(1) == 1 && y.stride(1) == 1;
+  bool is_col_major = z.stride(0) == 1 && y.stride(0) == 1;
+  ASSERT(is_row_major || is_col_major, "Both matrices need to be either row or col major");
+  return is_row_major;
+}
+
+/**
+ * @brief create a cuSparse dense descriptor
+ * @tparam ValueType Data type of dense_view (float/double)
+ * @tparam IndexType Type of dense_view
+ * @tparam LayoutPolicy layout of dense_view
+ * @param[in] dense_view input raft::device_matrix_view
+ * @param[in] is_row_major data layout of raft::device_matrix_view
+ * @returns dense matrix descriptor to be used by cuSparse API
+ */
+template <typename ValueType, typename IndexType, typename LayoutPolicy>
+cusparseDnMatDescr_t create_descriptor(
+  raft::device_matrix_view<ValueType, IndexType, LayoutPolicy>& dense_view, const bool is_row_major)
+{
+  auto order   = is_row_major ? CUSPARSE_ORDER_ROW : CUSPARSE_ORDER_COL;
+  IndexType ld = is_row_major ? dense_view.stride(0) : dense_view.stride(1);
+  cusparseDnMatDescr_t descr;
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednmat(
+    &descr,
+    dense_view.extent(0),
+    dense_view.extent(1),
+    ld,
+    const_cast<std::remove_const_t<ValueType>*>(dense_view.data_handle()),
+    order));
+  return descr;
+}
+
+/**
+ * @brief create a cuSparse sparse descriptor
+ * @tparam ValueType Data type of sparse_view (float/double)
+ * @tparam NZType Type of sparse_view
+ * @param[in] sparse_view input raft::device_csr_matrix_view of size M rows x K columns
+ * @returns sparse matrix descriptor to be used by cuSparse API
+ */
+template <typename ValueType, typename NZType>
+cusparseSpMatDescr_t create_descriptor(
+  raft::device_csr_matrix_view<ValueType, int, int, NZType>& sparse_view)
+{
+  cusparseSpMatDescr_t descr;
+  auto csr_structure = sparse_view.structure_view();
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(
+    &descr,
+    csr_structure.get_n_rows(),
+    csr_structure.get_n_cols(),
+    csr_structure.get_nnz(),
+    const_cast<int*>(csr_structure.get_indptr().data()),
+    const_cast<int*>(csr_structure.get_indices().data()),
+    const_cast<std::remove_const_t<ValueType>*>(sparse_view.get_elements().data())));
+  return descr;
+}
+
+/**
+ * @brief SPMM function designed for handling all CSR * DENSE
+ * combinations of operand layouts for cuSparse.
+ * It computes the following equation: Z = alpha . X * Y + beta . Z
+ * where X is a CSR device matrix view and Y,Z are device matrix views
+ * @tparam ValueType Data type of input/output matrices (float/double)
+ * @tparam IndexType Type of Y and Z
+ * @tparam NZType Type of X
+ * @tparam LayoutPolicyY layout of Y
+ * @tparam LayoutPolicyZ layout of Z
+ * @param[in] handle raft handle
+ * @param[in] trans_x transpose operation for X
+ * @param[in] trans_y transpose operation for Y
+ * @param[in] is_row_major data layout of Y,Z
+ * @param[in] alpha scalar
+ * @param[in] descr_x input sparse descriptor
+ * @param[in] descr_y input dense descriptor
+ * @param[in] beta scalar
+ * @param[out] descr_z output dense descriptor
+ */
+template <typename ValueType>
+void spmm(raft::device_resources const& handle,
+          const bool trans_x,
+          const bool trans_y,
+          const bool is_row_major,
+          const ValueType* alpha,
+          cusparseSpMatDescr_t& descr_x,
+          cusparseDnMatDescr_t& descr_y,
+          const ValueType* beta,
+          cusparseDnMatDescr_t& descr_z)
+{
+  auto opX = trans_x ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE;
+  auto opY = trans_y ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE;
+  auto alg = is_row_major ? CUSPARSE_SPMM_CSR_ALG2 : CUSPARSE_SPMM_CSR_ALG1;
+  size_t bufferSize;
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(),
+                                                                  opX,
+                                                                  opY,
+                                                                  alpha,
+                                                                  descr_x,
+                                                                  descr_y,
+                                                                  beta,
+                                                                  descr_z,
+                                                                  alg,
+                                                                  &bufferSize,
+                                                                  handle.get_stream()));
+
+  raft::interruptible::synchronize(handle.get_stream());
+
+  rmm::device_uvector<ValueType> tmp(bufferSize, handle.get_stream());
+
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(),
+                                                       opX,
+                                                       opY,
+                                                       alpha,
+                                                       descr_x,
+                                                       descr_y,
+                                                       beta,
+                                                       descr_z,
+                                                       alg,
+                                                       tmp.data(),
+                                                       handle.get_stream()));
+}
+
+}  // end namespace detail
+}  // end namespace linalg
+}  // end namespace sparse
+}  // end namespace raft
diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh
index af72d0141e..98e23afcdf 100644
--- a/cpp/include/raft/sparse/linalg/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/norm.cuh
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include <raft/linalg/norm_types.hpp>
 #include <raft/sparse/linalg/detail/norm.cuh>
 
 namespace raft {
@@ -66,6 +67,38 @@ void csr_row_normalize_max(const int* ia,  // csr row ind array (sorted by row)
   detail::csr_row_normalize_max(ia, vals, nnz, m, result, stream);
 }
 
+/**
+ * @brief Compute row-wise norm of the input matrix and perform fin_op lambda
+ *
+ * Row-wise norm is useful while computing pairwise distance matrix, for
+ * example.
+ * This is used in many clustering algos like knn, kmeans, dbscan, etc...
+ *
+ * @tparam Type the data type
+ * @tparam Lambda device final lambda
+ * @tparam IdxType Integer type used to for addressing
+ * @param handle raft handle
+ * @param ia the input matrix row index array
+ * @param data the input matrix nnz data
+ * @param nnz number of elements in data
+ * @param N number of rows
+ * @param norm the output vector of row-wise norm, size [N]
+ * @param type the type of norm to be applied
+ * @param fin_op the final lambda op
+ */
+template <typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
+void rowNormCsr(raft::device_resources const& handle,
+                const IdxType* ia,
+                const Type* data,
+                const IdxType nnz,
+                const IdxType N,
+                Type* norm,
+                raft::linalg::NormType type,
+                Lambda fin_op = raft::identity_op())
+{
+  detail::rowNormCsrCaller(ia, data, nnz, N, norm, type, fin_op, handle.get_stream());
+}
+
 };  // end NAMESPACE linalg
 };  // end NAMESPACE sparse
 };  // end NAMESPACE raft
diff --git a/cpp/include/raft/sparse/linalg/spmm.cuh b/cpp/include/raft/sparse/linalg/spmm.cuh
new file mode 100644
index 0000000000..73170cfc70
--- /dev/null
+++ b/cpp/include/raft/sparse/linalg/spmm.cuh
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __SPMM_H
+#define __SPMM_H
+
+#pragma once
+
+#include "detail/spmm.hpp"
+
+namespace raft {
+namespace sparse {
+namespace linalg {
+
+/**
+ * @brief SPMM function designed for handling all CSR * DENSE
+ * combinations of operand layouts for cuSparse.
+ * It computes the following equation: Z = alpha . X * Y + beta . Z
+ * where X is a CSR device matrix view and Y,Z are device matrix views
+ * @tparam ValueType Data type of input/output matrices (float/double)
+ * @tparam IndexType Type of Y and Z
+ * @tparam NZType Type of X
+ * @tparam LayoutPolicyY layout of Y
+ * @tparam LayoutPolicyZ layout of Z
+ * @param[in] handle raft handle
+ * @param[in] trans_x transpose operation for X
+ * @param[in] trans_y transpose operation for Y
+ * @param[in] alpha scalar
+ * @param[in] x input raft::device_csr_matrix_view
+ * @param[in] y input raft::device_matrix_view
+ * @param[in] beta scalar
+ * @param[out] z output raft::device_matrix_view
+ */
+template <typename ValueType,
+          typename IndexType,
+          typename NZType,
+          typename LayoutPolicyY,
+          typename LayoutPolicyZ>
+void spmm(raft::device_resources const& handle,
+          const bool trans_x,
+          const bool trans_y,
+          const ValueType* alpha,
+          raft::device_csr_matrix_view<const ValueType, int, int, NZType> x,
+          raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY> y,
+          const ValueType* beta,
+          raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z)
+{
+  bool is_row_major = detail::is_row_major(y, z);
+
+  auto descr_x = detail::create_descriptor(x);
+  auto descr_y = detail::create_descriptor(y, is_row_major);
+  auto descr_z = detail::create_descriptor(z, is_row_major);
+
+  detail::spmm(handle, trans_x, trans_y, is_row_major, alpha, descr_x, descr_y, beta, descr_z);
+
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(descr_x));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_y));
+  RAFT_CUSPARSE_TRY_NO_THROW(cusparseDestroyDnMat(descr_z));
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+}  // end namespace linalg
+}  // end namespace sparse
+}  // end namespace raft
+
+#endif
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 22e8a9d73c..c8d4f91ec0 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -236,6 +236,7 @@ if(BUILD_TESTS)
     test/sparse/degree.cu
     test/sparse/filter.cu
     test/sparse/norm.cu
+    test/sparse/normalize.cu
     test/sparse/reduce.cu
     test/sparse/row_op.cu
     test/sparse/sort.cu
@@ -244,7 +245,14 @@ if(BUILD_TESTS)
   )
 
   ConfigureTest(
-    NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu OPTIONAL LIB
+    NAME 
+    SPARSE_DIST_TEST 
+    PATH 
+    test/sparse/dist_coo_spmv.cu 
+    test/sparse/distance.cu 
+    test/sparse/gram.cu 
+    OPTIONAL 
+    LIB
   )
 
   ConfigureTest(
diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu
index f99d02dc7f..47da201465 100644
--- a/cpp/test/distance/gram.cu
+++ b/cpp/test/distance/gram.cu
@@ -19,6 +19,7 @@
 #endif
 
 #include "../test_utils.cuh"
+#include "gram_base.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <memory>
@@ -31,12 +32,6 @@
 
 namespace raft::distance::kernels {
 
-// Get the offset of element [i,k].
-HDI int get_offset(int i, int k, int ld, bool is_row_major)
-{
-  return is_row_major ? i * ld + k : i + k * ld;
-}
-
 struct GramMatrixInputs {
   int n1;      // feature vectors in matrix 1
   int n2;      // featuer vectors in matrix 2
@@ -110,62 +105,46 @@ class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
 
   ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); }
 
-  // Calculate the Gram matrix on the host.
-  void naiveKernel()
-  {
-    std::vector<math_t> x1_host(x1.size());
-    raft::update_host(x1_host.data(), x1.data(), x1.size(), stream);
-    std::vector<math_t> x2_host(x2.size());
-    raft::update_host(x2_host.data(), x2.data(), x2.size(), stream);
-    handle.sync_stream(stream);
-
-    for (int i = 0; i < params.n1; i++) {
-      for (int j = 0; j < params.n2; j++) {
-        float d = 0;
-        for (int k = 0; k < params.n_cols; k++) {
-          if (params.kernel.kernel == KernelType::RBF) {
-            math_t diff = x1_host[get_offset(i, k, params.ld1, params.is_row_major)] -
-                          x2_host[get_offset(j, k, params.ld2, params.is_row_major)];
-            d += diff * diff;
-          } else {
-            d += x1_host[get_offset(i, k, params.ld1, params.is_row_major)] *
-                 x2_host[get_offset(j, k, params.ld2, params.is_row_major)];
-          }
-        }
-        int idx  = get_offset(i, j, params.ld_out, params.is_row_major);
-        math_t v = 0;
-        switch (params.kernel.kernel) {
-          case (KernelType::LINEAR): gram_host[idx] = d; break;
-          case (KernelType::POLYNOMIAL):
-            v              = params.kernel.gamma * d + params.kernel.coef0;
-            gram_host[idx] = std::pow(v, params.kernel.degree);
-            break;
-          case (KernelType::TANH):
-            gram_host[idx] = std::tanh(params.kernel.gamma * d + params.kernel.coef0);
-            break;
-          case (KernelType::RBF): gram_host[idx] = exp(-params.kernel.gamma * d); break;
-        }
-      }
-    }
-  }
-
   void runTest()
   {
-    std::unique_ptr<GramMatrixBase<math_t>> kernel = std::unique_ptr<GramMatrixBase<math_t>>(
-      KernelFactory<math_t>::create(params.kernel, handle.get_cublas_handle()));
-
-    kernel->evaluate(x1.data(),
-                     params.n1,
-                     params.n_cols,
-                     x2.data(),
-                     params.n2,
-                     gram.data(),
-                     params.is_row_major,
-                     stream,
-                     params.ld1,
-                     params.ld2,
-                     params.ld_out);
-    naiveKernel();
+    std::unique_ptr<GramMatrixBase<math_t>> kernel =
+      std::unique_ptr<GramMatrixBase<math_t>>(KernelFactory<math_t>::create(params.kernel));
+
+    auto x1_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<const math_t, int, raft::layout_c_contiguous>(
+            x1.data(), params.n1, params.n_cols, params.ld1)
+        : raft::make_device_strided_matrix_view<const math_t, int, raft::layout_f_contiguous>(
+            x1.data(), params.n1, params.n_cols, params.ld1);
+    auto x2_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<const math_t, int, raft::layout_c_contiguous>(
+            x2.data(), params.n2, params.n_cols, params.ld2)
+        : raft::make_device_strided_matrix_view<const math_t, int, raft::layout_f_contiguous>(
+            x2.data(), params.n2, params.n_cols, params.ld2);
+    auto out_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<math_t, int, raft::layout_c_contiguous>(
+            gram.data(), params.n1, params.n2, params.ld_out)
+        : raft::make_device_strided_matrix_view<math_t, int, raft::layout_f_contiguous>(
+            gram.data(), params.n1, params.n2, params.ld_out);
+
+    (*kernel)(handle, x1_span, x2_span, out_span);
+
+    naiveGramMatrixKernel(params.n1,
+                          params.n2,
+                          params.n_cols,
+                          x1,
+                          x2,
+                          gram_host.data(),
+                          params.ld1,
+                          params.ld2,
+                          params.ld_out,
+                          params.is_row_major,
+                          params.kernel,
+                          stream,
+                          handle);
+
     ASSERT_TRUE(raft::devArrMatchHost(
       gram_host.data(), gram.data(), gram.size(), raft::CompareApprox<math_t>(1e-6f)));
   }
diff --git a/cpp/test/distance/gram_base.cuh b/cpp/test/distance/gram_base.cuh
new file mode 100644
index 0000000000..8c0652bc16
--- /dev/null
+++ b/cpp/test/distance/gram_base.cuh
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <raft/distance/distance_types.hpp>
+#include <raft/distance/kernels.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+#include <rmm/device_uvector.hpp>
+
+namespace raft {
+namespace distance {
+namespace kernels {
+
+// Get the offset of element [i,k].
+HDI int get_offset(int i, int k, int ld, bool is_row_major)
+{
+  return is_row_major ? i * ld + k : i + k * ld;
+}
+
+// Calculate the Gram matrix on the host.
+template <typename math_t>
+void naiveGramMatrixKernel(int n1,
+                           int n2,
+                           int n_cols,
+                           const rmm::device_uvector<math_t>& x1,
+                           const rmm::device_uvector<math_t>& x2,
+                           math_t* gram_host,
+                           int ld1,
+                           int ld2,
+                           int ld_out,
+                           bool is_row_major,
+                           KernelParams kernel,
+                           cudaStream_t stream,
+                           const raft::device_resources& handle)
+{
+  std::vector<math_t> x1_host(x1.size());
+  raft::update_host(x1_host.data(), x1.data(), x1.size(), stream);
+  std::vector<math_t> x2_host(x2.size());
+  raft::update_host(x2_host.data(), x2.data(), x2.size(), stream);
+  handle.sync_stream(stream);
+
+  for (int i = 0; i < n1; i++) {
+    for (int j = 0; j < n2; j++) {
+      float d = 0;
+      for (int k = 0; k < n_cols; k++) {
+        if (kernel.kernel == KernelType::RBF) {
+          math_t diff = x1_host[get_offset(i, k, ld1, is_row_major)] -
+                        x2_host[get_offset(j, k, ld2, is_row_major)];
+          d += diff * diff;
+        } else {
+          d += x1_host[get_offset(i, k, ld1, is_row_major)] *
+               x2_host[get_offset(j, k, ld2, is_row_major)];
+        }
+      }
+      int idx  = get_offset(i, j, ld_out, is_row_major);
+      math_t v = 0;
+      switch (kernel.kernel) {
+        case (KernelType::LINEAR): gram_host[idx] = d; break;
+        case (KernelType::POLYNOMIAL):
+          v              = kernel.gamma * d + kernel.coef0;
+          gram_host[idx] = std::pow(v, kernel.degree);
+          break;
+        case (KernelType::TANH): gram_host[idx] = std::tanh(kernel.gamma * d + kernel.coef0); break;
+        case (KernelType::RBF): gram_host[idx] = exp(-kernel.gamma * d); break;
+      }
+    }
+  }
+}
+
+}  // namespace kernels
+}  // namespace distance
+}  // namespace raft
diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu
new file mode 100644
index 0000000000..86a2e0cf43
--- /dev/null
+++ b/cpp/test/sparse/gram.cu
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined RAFT_DISTANCE_COMPILED
+#include <raft/distance/specializations.cuh>
+#endif
+
+#include "../distance/gram_base.cuh"
+#include "../test_utils.cuh"
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <raft/distance/distance_types.hpp>
+#include <raft/distance/kernels.cuh>
+#include <raft/random/rng.cuh>
+#include <raft/sparse/convert/dense.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+#include <raft/util/itertools.hpp>
+#include <rmm/device_uvector.hpp>
+
+namespace raft::distance::kernels {
+
+/**
+ * Structure to describe structure of the input matrices:
+ *  - DENSE: dense, dense
+ *  - MIX: CSR, dense
+ *  - CSR: CSR, CSR
+ */
+enum SparseType { DENSE, MIX, CSR };
+
+struct GramMatrixInputs {
+  int n1;      // feature vectors in matrix 1
+  int n2;      // featuer vectors in matrix 2
+  int n_cols;  // number of elements in a feature vector
+  bool is_row_major;
+  SparseType sparse_input;
+  KernelParams kernel;
+  int ld1;
+  int ld2;
+  int ld_out;
+  // We will generate random input using the dimensions given here.
+  // The reference output is calculated by a custom kernel.
+};
+
+std::ostream& operator<<(std::ostream& os, const GramMatrixInputs& p)
+{
+  std::vector<std::string> kernel_names{"linear", "poly", "rbf", "tanh"};
+  os << "/" << p.n1 << "x" << p.n2 << "x" << p.n_cols << "/"
+     << (p.is_row_major ? "RowMajor/" : "ColMajor/")
+     << (p.sparse_input == SparseType::DENSE
+           ? "DenseDense/"
+           : (p.sparse_input == SparseType::MIX ? "CsrDense/" : "CsrCsr/"))
+     << kernel_names[p.kernel.kernel] << "/ld_" << p.ld1 << "x" << p.ld2 << "x" << p.ld_out;
+  return os;
+}
+
+/*struct KernelParams {
+  // Kernel function parameters
+  KernelType kernel;  //!< Type of the kernel function
+  int degree;         //!< Degree of polynomial kernel (ignored by others)
+  double gamma;       //!< multiplier in the
+  double coef0;       //!< additive constant in poly and tanh kernels
+};*/
+
+// const KernelParams linear_kernel_params{.kernel=KernelType::LINEAR};
+
+// {KernelType::POLYNOMIAL, 2, 0.5, 2.4}, {KernelType::TANH, 0, 0.5, 2.4}, {KernelType::RBF, 0, 0.5}
+const std::vector<GramMatrixInputs> inputs = raft::util::itertools::product<GramMatrixInputs>(
+  {42},
+  {137},
+  {2},
+  {true, false},
+  {SparseType::DENSE, SparseType::MIX, SparseType::CSR},
+  {KernelParams{KernelType::LINEAR},
+   KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4},
+   KernelParams{KernelType::TANH, 0, 0.5, 2.4},
+   KernelParams{KernelType::RBF, 0, 0.5}});
+
+// (ld_1, ld_2, ld_out) not supported by RBF and CSR
+const std::vector<GramMatrixInputs> inputs_ld = raft::util::itertools::product<GramMatrixInputs>(
+  {137},
+  {42},
+  {2},
+  {true, false},
+  {SparseType::DENSE, SparseType::MIX},
+  {KernelParams{KernelType::LINEAR},
+   KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4},
+   KernelParams{KernelType::TANH, 0, 0.5, 2.4}},
+  {159},
+  {73},
+  {144});
+
+// (ld_1, ld_2) are supported by CSR
+const std::vector<GramMatrixInputs> inputs_ld_csr =
+  raft::util::itertools::product<GramMatrixInputs>(
+    {42},
+    {137},
+    {2},
+    {true, false},
+    {SparseType::CSR, SparseType::MIX},
+    {KernelParams{KernelType::LINEAR},
+     KernelParams{KernelType::POLYNOMIAL, 2, 0.5, 2.4},
+     KernelParams{KernelType::TANH, 0, 0.5, 2.4}},
+    {64},
+    {155},
+    {0});
+
+template <typename math_t>
+class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
+ protected:
+  GramMatrixTest()
+    : params(GetParam()),
+      stream(0),
+      x1(0, stream),
+      x2(0, stream),
+      x1_csr_indptr(0, stream),
+      x1_csr_indices(0, stream),
+      x1_csr_data(0, stream),
+      x2_csr_indptr(0, stream),
+      x2_csr_indices(0, stream),
+      x2_csr_data(0, stream),
+      gram(0, stream),
+      gram_host(0)
+  {
+    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+
+    if (params.ld1 == 0) { params.ld1 = params.is_row_major ? params.n_cols : params.n1; }
+    if (params.ld2 == 0) { params.ld2 = params.is_row_major ? params.n_cols : params.n2; }
+    if (params.ld_out == 0) { params.ld_out = params.is_row_major ? params.n2 : params.n1; }
+    // Derive the size of the output from the offset of the last element.
+    size_t size = get_offset(params.n1 - 1, params.n_cols - 1, params.ld1, params.is_row_major) + 1;
+    x1.resize(size, stream);
+    size = get_offset(params.n2 - 1, params.n_cols - 1, params.ld2, params.is_row_major) + 1;
+    x2.resize(size, stream);
+    size = get_offset(params.n1 - 1, params.n2 - 1, params.ld_out, params.is_row_major) + 1;
+
+    gram.resize(size, stream);
+    RAFT_CUDA_TRY(cudaMemsetAsync(gram.data(), 0, gram.size() * sizeof(math_t), stream));
+    gram_host.resize(gram.size());
+    std::fill(gram_host.begin(), gram_host.end(), 0);
+
+    raft::random::Rng r(42137ULL);
+    r.uniform(x1.data(), x1.size(), math_t(0), math_t(1), stream);
+    r.uniform(x2.data(), x2.size(), math_t(0), math_t(1), stream);
+  }
+
+  ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); }
+
+  int prepareCsr(math_t* dense, int n_rows, int ld, int* indptr, int* indices, math_t* data)
+  {
+    int nnz           = 0;
+    double eps        = 1e-6;
+    int n_cols        = params.n_cols;
+    bool is_row_major = params.is_row_major;
+    size_t dense_size = get_offset(n_rows - 1, n_cols - 1, ld, is_row_major) + 1;
+
+    std::vector<math_t> dense_host(dense_size);
+    raft::update_host(dense_host.data(), dense, dense_size, stream);
+    handle.sync_stream(stream);
+
+    std::vector<int> indptr_host(n_rows + 1);
+    std::vector<int> indices_host(n_rows * n_cols);
+    std::vector<math_t> data_host(n_rows * n_cols);
+
+    // create csr matrix from dense (with threshold)
+    for (int i = 0; i < n_rows; ++i) {
+      indptr_host[i] = nnz;
+      for (int j = 0; j < n_cols; ++j) {
+        math_t value = dense_host[get_offset(i, j, ld, is_row_major)];
+        if (value > eps) {
+          indices_host[nnz] = j;
+          data_host[nnz]    = value;
+          nnz++;
+        }
+      }
+    }
+    indptr_host[n_rows] = nnz;
+
+    // fill back dense matrix from CSR
+    std::fill(dense_host.data(), dense_host.data() + dense_size, 0);
+    for (int i = 0; i < n_rows; ++i) {
+      for (int idx = indptr_host[i]; idx < indptr_host[i + 1]; ++idx) {
+        dense_host[get_offset(i, indices_host[idx], ld, is_row_major)] = data_host[idx];
+      }
+    }
+
+    raft::update_device(dense, dense_host.data(), dense_size, stream);
+    raft::update_device(indptr, indptr_host.data(), n_rows + 1, stream);
+    raft::update_device(indices, indices_host.data(), nnz, stream);
+    raft::update_device(data, data_host.data(), nnz, stream);
+    handle.sync_stream(stream);
+
+    return nnz;
+  }
+
+  void runTest()
+  {
+    std::unique_ptr<GramMatrixBase<math_t>> kernel =
+      std::unique_ptr<GramMatrixBase<math_t>>(KernelFactory<math_t>::create(params.kernel));
+
+    auto x1_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<const math_t, int, raft::layout_c_contiguous>(
+            x1.data(), params.n1, params.n_cols, params.ld1)
+        : raft::make_device_strided_matrix_view<const math_t, int, raft::layout_f_contiguous>(
+            x1.data(), params.n1, params.n_cols, params.ld1);
+    auto x2_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<const math_t, int, raft::layout_c_contiguous>(
+            x2.data(), params.n2, params.n_cols, params.ld2)
+        : raft::make_device_strided_matrix_view<const math_t, int, raft::layout_f_contiguous>(
+            x2.data(), params.n2, params.n_cols, params.ld2);
+    auto out_span =
+      params.is_row_major
+        ? raft::make_device_strided_matrix_view<math_t, int, raft::layout_c_contiguous>(
+            gram.data(), params.n1, params.n2, params.ld_out)
+        : raft::make_device_strided_matrix_view<math_t, int, raft::layout_f_contiguous>(
+            gram.data(), params.n1, params.n2, params.ld_out);
+
+    if (params.sparse_input == SparseType::DENSE) {
+      (*kernel)(handle, x1_span, x2_span, out_span);
+    } else {
+      x1_csr_indptr.reserve(params.n1 + 1, stream);
+      x1_csr_indices.reserve(params.n1 * params.n_cols, stream);
+      x1_csr_data.reserve(params.n1 * params.n_cols, stream);
+      int x1_nnz = prepareCsr(x1.data(),
+                              params.n1,
+                              params.ld1,
+                              x1_csr_indptr.data(),
+                              x1_csr_indices.data(),
+                              x1_csr_data.data());
+
+      auto x1_csr_structure = raft::make_device_compressed_structure_view<int, int, int>(
+        x1_csr_indptr.data(), x1_csr_indices.data(), params.n1, params.n_cols, x1_nnz);
+      auto x1_csr = raft::device_csr_matrix_view<const math_t, int, int, int>(
+        raft::device_span<const math_t>(x1_csr_data.data(), x1_csr_structure.get_nnz()),
+        x1_csr_structure);
+
+      if (params.sparse_input == SparseType::MIX) {
+        (*kernel)(handle, x1_csr, x2_span, out_span);
+      } else {
+        x2_csr_indptr.reserve(params.n2 + 1, stream);
+        x2_csr_indices.reserve(params.n2 * params.n_cols, stream);
+        x2_csr_data.reserve(params.n2 * params.n_cols, stream);
+        int x2_nnz = prepareCsr(x2.data(),
+                                params.n2,
+                                params.ld2,
+                                x2_csr_indptr.data(),
+                                x2_csr_indices.data(),
+                                x2_csr_data.data());
+
+        auto x2_csr_structure = raft::make_device_compressed_structure_view<int, int, int>(
+          x2_csr_indptr.data(), x2_csr_indices.data(), params.n2, params.n_cols, x2_nnz);
+        auto x2_csr = raft::device_csr_matrix_view<const math_t, int, int, int>(
+          raft::device_span<const math_t>(x2_csr_data.data(), x2_csr_structure.get_nnz()),
+          x2_csr_structure);
+
+        (*kernel)(handle, x1_csr, x2_csr, out_span);
+      }
+    }
+
+    naiveGramMatrixKernel(params.n1,
+                          params.n2,
+                          params.n_cols,
+                          x1,
+                          x2,
+                          gram_host.data(),
+                          params.ld1,
+                          params.ld2,
+                          params.ld_out,
+                          params.is_row_major,
+                          params.kernel,
+                          stream,
+                          handle);
+
+    handle.sync_stream(stream);
+
+    ASSERT_TRUE(raft::devArrMatchHost(
+      gram_host.data(), gram.data(), gram.size(), raft::CompareApprox<math_t>(1e-6f)));
+  }
+
+  raft::device_resources handle;
+  cudaStream_t stream = 0;
+  GramMatrixInputs params;
+
+  rmm::device_uvector<math_t> x1;
+  rmm::device_uvector<math_t> x2;
+
+  rmm::device_uvector<int> x1_csr_indptr;
+  rmm::device_uvector<int> x1_csr_indices;
+  rmm::device_uvector<math_t> x1_csr_data;
+  rmm::device_uvector<int> x2_csr_indptr;
+  rmm::device_uvector<int> x2_csr_indices;
+  rmm::device_uvector<math_t> x2_csr_data;
+
+  rmm::device_uvector<math_t> gram;
+  std::vector<math_t> gram_host;
+};
+
+typedef GramMatrixTest<float> GramMatrixTestFloatStandard;
+typedef GramMatrixTest<float> GramMatrixTestFloatLd;
+typedef GramMatrixTest<float> GramMatrixTestFloatLdCsr;
+
+TEST_P(GramMatrixTestFloatStandard, Gram) { runTest(); }
+TEST_P(GramMatrixTestFloatLd, Gram) { runTest(); }
+TEST_P(GramMatrixTestFloatLdCsr, Gram) { runTest(); }
+
+INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloatStandard, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_SUITE_P(GramMatrixTests, GramMatrixTestFloatLd, ::testing::ValuesIn(inputs_ld));
+INSTANTIATE_TEST_SUITE_P(GramMatrixTests,
+                         GramMatrixTestFloatLdCsr,
+                         ::testing::ValuesIn(inputs_ld_csr));
+};  // end namespace raft::distance::kernels
diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu
index 91b7b09fcc..65d857652c 100644
--- a/cpp/test/sparse/norm.cu
+++ b/cpp/test/sparse/norm.cu
@@ -19,7 +19,7 @@
 #include "../test_utils.cuh"
 
 #include <raft/core/device_resources.hpp>
-#include <raft/sparse/csr.hpp>
+#include <raft/linalg/norm_types.hpp>
 #include <raft/sparse/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -29,26 +29,24 @@
 namespace raft {
 namespace sparse {
 
-enum NormalizeMethod { MAX, L1 };
-
 template <typename Type_f, typename Index_>
-struct CSRRowNormalizeInputs {
-  NormalizeMethod method;
-  std::vector<Index_> ex_scan;
-  std::vector<Type_f> in_vals;
+struct CSRRowNormInputs {
+  raft::linalg::NormType norm;
+  std::vector<Index_> indptr;
+  std::vector<Type_f> data;
   std::vector<Type_f> verify;
 };
 
 template <typename Type_f, typename Index_>
-class CSRRowNormalizeTest : public ::testing::TestWithParam<CSRRowNormalizeInputs<Type_f, Index_>> {
+class CSRRowNormTest : public ::testing::TestWithParam<CSRRowNormInputs<Type_f, Index_>> {
  public:
-  CSRRowNormalizeTest()
-    : params(::testing::TestWithParam<CSRRowNormalizeInputs<Type_f, Index_>>::GetParam()),
+  CSRRowNormTest()
+    : params(::testing::TestWithParam<CSRRowNormInputs<Type_f, Index_>>::GetParam()),
       stream(handle.get_stream()),
-      in_vals(params.in_vals.size(), stream),
-      verify(params.verify.size(), stream),
-      ex_scan(params.ex_scan.size(), stream),
-      result(params.verify.size(), stream)
+      data(params.data.size(), stream),
+      verify(params.indptr.size() - 1, stream),
+      indptr(params.indptr.size(), stream),
+      result(params.indptr.size() - 1, stream)
   {
   }
 
@@ -57,71 +55,66 @@ class CSRRowNormalizeTest : public ::testing::TestWithParam<CSRRowNormalizeInput
 
   void Run()
   {
-    Index_ n_rows = params.ex_scan.size();
-    Index_ nnz    = params.in_vals.size();
-
-    raft::update_device(ex_scan.data(), params.ex_scan.data(), n_rows, stream);
-    raft::update_device(in_vals.data(), params.in_vals.data(), nnz, stream);
-    raft::update_device(verify.data(), params.verify.data(), nnz, stream);
-
-    switch (params.method) {
-      case MAX:
-        linalg::csr_row_normalize_max<Type_f>(
-          ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream);
-        break;
-      case L1:
-        linalg::csr_row_normalize_l1<Type_f>(
-          ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream);
-        break;
-    }
+    Index_ n_rows = params.indptr.size() - 1;
+    Index_ nnz    = params.data.size();
+
+    raft::update_device(indptr.data(), params.indptr.data(), n_rows + 1, stream);
+    raft::update_device(data.data(), params.data.data(), nnz, stream);
+    raft::update_device(verify.data(), params.verify.data(), n_rows, stream);
+
+    linalg::rowNormCsr(handle, indptr.data(), data.data(), nnz, n_rows, result.data(), params.norm);
     RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
 
     ASSERT_TRUE(
-      raft::devArrMatch<Type_f>(verify.data(), result.data(), nnz, raft::Compare<Type_f>()));
+      raft::devArrMatch<Type_f>(verify.data(), result.data(), n_rows, raft::Compare<Type_f>()));
   }
 
  protected:
   raft::device_resources handle;
   cudaStream_t stream;
 
-  CSRRowNormalizeInputs<Type_f, Index_> params;
-  rmm::device_uvector<Index_> ex_scan;
-  rmm::device_uvector<Type_f> in_vals, result, verify;
+  CSRRowNormInputs<Type_f, Index_> params;
+  rmm::device_uvector<Index_> indptr;
+  rmm::device_uvector<Type_f> data, result, verify;
 };
 
-using CSRRowNormalizeTestF = CSRRowNormalizeTest<float, int>;
-TEST_P(CSRRowNormalizeTestF, Result) { Run(); }
-
-using CSRRowNormalizeTestD = CSRRowNormalizeTest<double, int>;
-TEST_P(CSRRowNormalizeTestD, Result) { Run(); }
-
-const std::vector<CSRRowNormalizeInputs<float, int>> csrnormalize_inputs_f = {
-  {MAX,
-   {0, 4, 8, 9},
-   {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0},
-   {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}},
-  {L1,
-   {0, 4, 8, 9},
-   {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0},
-   {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}},
+using CSRRowNormTestF = CSRRowNormTest<float, int>;
+TEST_P(CSRRowNormTestF, Result) { Run(); }
+
+using CSRRowNormTestD = CSRRowNormTest<double, int>;
+TEST_P(CSRRowNormTestD, Result) { Run(); }
+
+const std::vector<CSRRowNormInputs<float, int>> csrnorm_inputs_f = {
+  {raft::linalg::NormType::LinfNorm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {5.0, 10.0, 2.0}},
+  {raft::linalg::NormType::L1Norm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {8.0, 13.0, 4.0}},
+  {raft::linalg::NormType::L2Norm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {30.0, 105.0, 6.0}},
 };
-const std::vector<CSRRowNormalizeInputs<double, int>> csrnormalize_inputs_d = {
-  {MAX,
-   {0, 4, 8, 9},
-   {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0},
-   {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}},
-  {L1,
-   {0, 4, 8, 9},
-   {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0},
-   {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}},
+const std::vector<CSRRowNormInputs<double, int>> csrnorm_inputs_d = {
+  {raft::linalg::NormType::LinfNorm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {5.0, 10.0, 2.0}},
+  {raft::linalg::NormType::L1Norm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {8.0, 13.0, 4.0}},
+  {raft::linalg::NormType::L2Norm,
+   {0, 3, 7, 10},
+   {5.0, 1.0, 2.0, 0.0, 10.0, 1.0, 2.0, 1.0, 1.0, 2.0},
+   {30.0, 105.0, 6.0}},
 };
 
-INSTANTIATE_TEST_CASE_P(SparseNormTest,
-                        CSRRowNormalizeTestF,
-                        ::testing::ValuesIn(csrnormalize_inputs_f));
-INSTANTIATE_TEST_CASE_P(SparseNormTest,
-                        CSRRowNormalizeTestD,
-                        ::testing::ValuesIn(csrnormalize_inputs_d));
+INSTANTIATE_TEST_CASE_P(SparseNormTest, CSRRowNormTestF, ::testing::ValuesIn(csrnorm_inputs_f));
+INSTANTIATE_TEST_CASE_P(SparseNormTest, CSRRowNormTestD, ::testing::ValuesIn(csrnorm_inputs_d));
 
 }  // namespace sparse
 }  // namespace raft
diff --git a/cpp/test/sparse/normalize.cu b/cpp/test/sparse/normalize.cu
new file mode 100644
index 0000000000..91b7b09fcc
--- /dev/null
+++ b/cpp/test/sparse/normalize.cu
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../test_utils.cuh"
+
+#include <raft/core/device_resources.hpp>
+#include <raft/sparse/csr.hpp>
+#include <raft/sparse/linalg/norm.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+#include <iostream>
+#include <limits>
+
+namespace raft {
+namespace sparse {
+
+enum NormalizeMethod { MAX, L1 };
+
+template <typename Type_f, typename Index_>
+struct CSRRowNormalizeInputs {
+  NormalizeMethod method;
+  std::vector<Index_> ex_scan;
+  std::vector<Type_f> in_vals;
+  std::vector<Type_f> verify;
+};
+
+template <typename Type_f, typename Index_>
+class CSRRowNormalizeTest : public ::testing::TestWithParam<CSRRowNormalizeInputs<Type_f, Index_>> {
+ public:
+  CSRRowNormalizeTest()
+    : params(::testing::TestWithParam<CSRRowNormalizeInputs<Type_f, Index_>>::GetParam()),
+      stream(handle.get_stream()),
+      in_vals(params.in_vals.size(), stream),
+      verify(params.verify.size(), stream),
+      ex_scan(params.ex_scan.size(), stream),
+      result(params.verify.size(), stream)
+  {
+  }
+
+ protected:
+  void SetUp() override {}
+
+  void Run()
+  {
+    Index_ n_rows = params.ex_scan.size();
+    Index_ nnz    = params.in_vals.size();
+
+    raft::update_device(ex_scan.data(), params.ex_scan.data(), n_rows, stream);
+    raft::update_device(in_vals.data(), params.in_vals.data(), nnz, stream);
+    raft::update_device(verify.data(), params.verify.data(), nnz, stream);
+
+    switch (params.method) {
+      case MAX:
+        linalg::csr_row_normalize_max<Type_f>(
+          ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream);
+        break;
+      case L1:
+        linalg::csr_row_normalize_l1<Type_f>(
+          ex_scan.data(), in_vals.data(), nnz, n_rows, result.data(), stream);
+        break;
+    }
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+
+    ASSERT_TRUE(
+      raft::devArrMatch<Type_f>(verify.data(), result.data(), nnz, raft::Compare<Type_f>()));
+  }
+
+ protected:
+  raft::device_resources handle;
+  cudaStream_t stream;
+
+  CSRRowNormalizeInputs<Type_f, Index_> params;
+  rmm::device_uvector<Index_> ex_scan;
+  rmm::device_uvector<Type_f> in_vals, result, verify;
+};
+
+using CSRRowNormalizeTestF = CSRRowNormalizeTest<float, int>;
+TEST_P(CSRRowNormalizeTestF, Result) { Run(); }
+
+using CSRRowNormalizeTestD = CSRRowNormalizeTest<double, int>;
+TEST_P(CSRRowNormalizeTestD, Result) { Run(); }
+
+const std::vector<CSRRowNormalizeInputs<float, int>> csrnormalize_inputs_f = {
+  {MAX,
+   {0, 4, 8, 9},
+   {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0},
+   {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}},
+  {L1,
+   {0, 4, 8, 9},
+   {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0},
+   {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}},
+};
+const std::vector<CSRRowNormalizeInputs<double, int>> csrnormalize_inputs_d = {
+  {MAX,
+   {0, 4, 8, 9},
+   {5.0, 1.0, 0.0, 0.0, 10.0, 1.0, 0.0, 0.0, 1.0, 0.0},
+   {1.0, 0.2, 0.0, 0.0, 1.0, 0.1, 0.0, 0.0, 1, 0.0}},
+  {L1,
+   {0, 4, 8, 9},
+   {1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0},
+   {0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1, 0.0}},
+};
+
+INSTANTIATE_TEST_CASE_P(SparseNormTest,
+                        CSRRowNormalizeTestF,
+                        ::testing::ValuesIn(csrnormalize_inputs_f));
+INSTANTIATE_TEST_CASE_P(SparseNormTest,
+                        CSRRowNormalizeTestD,
+                        ::testing::ValuesIn(csrnormalize_inputs_d));
+
+}  // namespace sparse
+}  // namespace raft

From b00d2f7beddb40fb18e4116a8c12eac0e6d578bc Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Tue, 25 Apr 2023 12:35:35 -0400
Subject: [PATCH 28/78] Dropping Python 3.8 (#1454)

This PR drops Python 3.8 in favor of 3.9.

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1454
---
 .github/workflows/build.yaml    | 16 ++++++++--------
 .github/workflows/pr.yaml       | 22 +++++++++++-----------
 .github/workflows/test.yaml     |  8 ++++----
 dependencies.yaml               |  6 +-----
 pyproject.toml                  |  2 +-
 python/pylibraft/pyproject.toml |  4 ++--
 python/raft-dask/pyproject.toml |  4 ++--
 7 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 0f5f84c158..aea876d89c 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch' && github.event_name == 'push'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39
     with:
       build_type: branch
       node_type: "gpu-v100-latest-1"
@@ -66,7 +66,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -100,7 +100,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@py-39
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c51d5c0a34..bc4ae5891c 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,41 +23,41 @@ jobs:
       - wheel-build-raft-dask
       - wheel-tests-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@py-39
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@py-39
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@py-39
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@py-39
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -76,7 +76,7 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
     with:
       build_type: pull-request
       package-name: raft_dask
@@ -98,7 +98,7 @@ jobs:
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
     with:
       build_type: pull-request
       package-name: raft_dask
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 05e96a6dff..d8add3af5a 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@py-39
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -44,7 +44,7 @@ jobs:
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 1bc50d5dd4..bc0fbd409e 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -239,10 +239,6 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
-          - matrix:
-              py: "3.8"
-            packages:
-              - python=3.8
           - matrix:
               py: "3.9"
             packages:
@@ -253,7 +249,7 @@ dependencies:
               - python=3.10
           - matrix:
             packages:
-              - python>=3.8,<3.11
+              - python>=3.9,<3.11
   run_pylibraft:
     common:
       - output_types: [conda, pyproject]
diff --git a/pyproject.toml b/pyproject.toml
index f8b09bed89..2982db2a23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.black]
 line-length = 79
-target-version = ["py38"]
+target-version = ["py39"]
 include = '\.py?$'
 force-exclude = '''
 /(
diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml
index 4fe0a52ce6..0fb311ae3b 100644
--- a/python/pylibraft/pyproject.toml
+++ b/python/pylibraft/pyproject.toml
@@ -35,7 +35,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "cuda-python >=11.7.1,<12.0",
     "numpy>=1.21",
@@ -44,7 +44,7 @@ dependencies = [
 classifiers = [
     "Intended Audience :: Developers",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
 ]
 
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index d7095aa00c..602148f758 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -32,7 +32,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "dask-cuda==23.6.*",
     "dask==2023.3.2",
@@ -46,7 +46,7 @@ dependencies = [
 classifiers = [
     "Intended Audience :: Developers",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
 ]
 

From 1866120665da322ba3867c50fa0bbfd843a1b1e2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 25 Apr 2023 14:52:41 -0700
Subject: [PATCH 29/78] Use pin_compatible to ensure that lower CTKs can be
 used (#1462)

The runtime requirement for the CTK is weaker than the build-time requirement. We allow installing raft into environments with an older CTK (any 11.x) than the build requirement (11.8)

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Divye Gala (https://github.com/divyegala)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/raft/pull/1462
---
 conda/recipes/libraft/meta.yaml                          | 7 ++++++-
 cpp/include/raft/distance/detail/kernels/gram_matrix.cuh | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index 8ec9cc10c6..83468d90af 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -52,6 +52,9 @@ outputs:
       host:
         - librmm ={{ minor_version }}
         - cudatoolkit {{ cuda_version }}
+      run:
+        - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
+        - librmm ={{ minor_version }}
     about:
       home: https://rapids.ai/
       license: Apache-2.0
@@ -68,7 +71,6 @@ outputs:
       run:
         - {{ pin_subpackage('libraft-headers-only', exact=True) }}
         - cuda-profiler-api {{ cuda_profiler_api_run_version }}
-        - cudatoolkit {{ cuda_version }}
         - librmm ={{ minor_version }}
         - libcublas {{ libcublas_run_version }}
         - libcublas-dev {{ libcublas_run_version }}
@@ -101,6 +103,7 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft-headers', exact=True) }}
+        - cudatoolkit {{ cuda_version }}
         - cuda-profiler-api {{ cuda_profiler_api_host_version }}
         - libcublas {{ libcublas_host_version }}
         - libcublas-dev {{ libcublas_host_version }}
@@ -135,6 +138,7 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft', exact=True) }}
+        - cudatoolkit {{ cuda_version }}
         - cuda-profiler-api {{ cuda_profiler_api_host_version }}
         - gmock {{ gtest_version }}
         - gtest {{ gtest_version }}
@@ -200,6 +204,7 @@ outputs:
         - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libraft', exact=True) }}
+        - cudatoolkit {{ cuda_version }}
         - libcublas {{ libcublas_host_version }}
         - libcublas-dev {{ libcublas_host_version }}
         - glog {{ glog_version }}
diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
index a68b904470..2154aa560c 100644
--- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
+++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
@@ -20,7 +20,7 @@
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
-//#include <raft/sparse/detail/cusparse_wrappers.h>
+// #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/distance/distance.cuh>
 #include <raft/sparse/linalg/spmm.cuh>
 

From de19c178e0ec283122a0a61328f564a4dc236357 Mon Sep 17 00:00:00 2001
From: Allard Hendriksen <ahendriksen@nvidia.com>
Date: Wed, 26 Apr 2023 01:13:04 +0200
Subject: [PATCH 30/78] Enable building with clang (limit strict error checking
 to GCC) (#1452)

Compiling RAFT with a non-GCC compiler can be tricky as there is very strict error checking (`-Werror=all-warnings` for instance) and therefore, the build is almost guaranteed to fail due to warnings elevated to errors.

I am sometimes building RAFT with clang because of its `-ftime-trace` feature, which helps find code that increases compile times.

This PR contains the changes I typically make to the CMakeLists.txt to enable clang compilation. With this change, strict error checking is only performed when the host compiler is GCC.

Tested with clang version 11.1.

Authors:
  - Allard Hendriksen (https://github.com/ahendriksen)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1452
---
 cpp/cmake/modules/ConfigureCUDA.cmake | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index c733d46985..ea8a077b0c 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -17,8 +17,16 @@ if(DISABLE_DEPRECATION_WARNINGS)
   list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
 endif()
 
+# Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
+# clang)
 if(CMAKE_COMPILER_IS_GNUCXX)
   list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
+  list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
+
+  # set warnings as errors
+  if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
+    list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings)
+  endif()
 endif()
 
 if(CUDA_LOG_COMPILE_TIME)
@@ -31,12 +39,6 @@ list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM")
 # make sure we produce smallest binary size
 list(APPEND RAFT_CUDA_FLAGS -Xfatbin=-compress-all)
 
-# set warnings as errors
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
-  list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings)
-endif()
-list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
-
 # Option to enable line info in CUDA device compilation to allow introspection when profiling /
 # memchecking
 if(CUDA_ENABLE_LINEINFO)

From a86ef51f6253fc5a875a0270bce65ecdb401130f Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 26 Apr 2023 16:51:33 -0400
Subject: [PATCH 31/78] Remove include statements from inside namespace (#1467)

Bringing in the below headers into the `raft::common::nvtx::detail` namespace breaks downstream users that need to use global symbols they provide. In the direct case I encountered the dlopen and dlclose functions became unusable.
```
#include <cstdint>
#include <cstdlib>
#include <mutex>
#include <nvtx3/nvToolsExt.h>
#include <string>
#include <type_traits>
#include <unordered_map>
```

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1467
---
 cpp/include/raft/core/detail/nvtx.hpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp
index ca4c5e4a08..e734c99029 100644
--- a/cpp/include/raft/core/detail/nvtx.hpp
+++ b/cpp/include/raft/core/detail/nvtx.hpp
@@ -18,8 +18,6 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-namespace raft::common::nvtx::detail {
-
 #ifdef NVTX_ENABLED
 
 #include <cstdint>
@@ -30,6 +28,8 @@ namespace raft::common::nvtx::detail {
 #include <type_traits>
 #include <unordered_map>
 
+namespace raft::common::nvtx::detail {
+
 /**
  * @brief An internal struct to store associated state with the color
  * generator
@@ -191,8 +191,12 @@ inline void pop_range()
   nvtxDomainRangePop(domain_store<Domain>::value());
 }
 
+}  // namespace raft::common::nvtx::detail
+
 #else   // NVTX_ENABLED
 
+namespace raft::common::nvtx::detail {
+
 template <typename Domain, typename... Args>
 inline void push_range(const char* format, Args... args)
 {
@@ -203,6 +207,6 @@ inline void pop_range()
 {
 }
 
-#endif  // NVTX_ENABLED
-
 }  // namespace raft::common::nvtx::detail
+
+#endif  // NVTX_ENABLED

From 082be6ecd4437d180bf34d5ba5d691a27b21141f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Mart=C3=ADnez?=
 <26169771+miguelusque@users.noreply.github.com>
Date: Thu, 27 Apr 2023 01:50:40 +0200
Subject: [PATCH 32/78] Add RAPIDS cuDF as a library that supports
 cuda_array_interface (#1444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding a mention and an example of RAPIDS cuDF compatibility with cuda_array_interface.

Authors:
  - Miguel Martínez (https://github.com/miguelusque)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1444
---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b77e906262..8de4c058dc 100755
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ in2 = cp.random.random_sample((n_samples, n_features), dtype=cp.float32)
 output = pairwise_distance(in1, in2, metric="euclidean")
 ```
 
-The `output` array in the above example is of type `raft.common.device_ndarray`, which supports [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html#cuda-array-interface-version-2) making it interoperable with other libraries like CuPy, Numba, and PyTorch that also support it. CuPy supports DLPack, which also enables zero-copy conversion from `raft.common.device_ndarray` to JAX and Tensorflow.
+The `output` array in the above example is of type `raft.common.device_ndarray`, which supports [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html#cuda-array-interface-version-2) making it interoperable with other libraries like CuPy, Numba, PyTorch and RAPIDS cuDF that also support it. CuPy supports DLPack, which also enables zero-copy conversion from `raft.common.device_ndarray` to JAX and Tensorflow.
 
 Below is an example of converting the output `pylibraft.device_ndarray` to a CuPy array:
 ```python
@@ -160,6 +160,11 @@ import torch
 torch_tensor = torch.as_tensor(output, device='cuda')
 ```
 
+Or converting to a RAPIDS cuDF dataframe:
+```python
+cudf_dataframe = cudf.DataFrame(output)
+```
+
 When the corresponding library has been installed and available in your environment, this conversion can also be done automatically by all RAFT compute APIs by setting a global configuration option:
 ```python
 import pylibraft.config

From fbce1a43b60508d59215faf03222ebde00e5bbfe Mon Sep 17 00:00:00 2001
From: Allard Hendriksen <ahendriksen@nvidia.com>
Date: Fri, 28 Apr 2023 04:23:20 +0200
Subject: [PATCH 33/78] [ENH] [FINAL] Header structure: combine all PRs into
 one (#1469)

This is a rebase of all the commits in PRs:
- #1437
- #1438
- #1439
- #1440
- #1441

The original PRs have not been rebased to preserve review comments. This PR is up to date with branch 23.06.

Closes #1416

Authors:
  - Allard Hendriksen (https://github.com/ahendriksen)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1469
---
 README.md                                     |   14 +-
 cpp/CMakeLists.txt                            |  306 ++--
 cpp/bench/ann/src/raft/raft_benchmark.cu      |    4 -
 cpp/bench/ann/src/raft/raft_ivf_flat.cu       |    6 +-
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |    1 +
 cpp/bench/ann/src/raft/raft_ivf_pq.cu         |    4 -
 cpp/bench/prims/CMakeLists.txt                |   12 +-
 cpp/bench/prims/cluster/kmeans.cu             |    4 -
 cpp/bench/prims/cluster/kmeans_balanced.cu    |    4 -
 cpp/bench/prims/distance/distance_common.cuh  |    3 -
 cpp/bench/prims/distance/fused_l2_nn.cu       |    3 -
 cpp/bench/prims/distance/kernels.cu           |    4 -
 cpp/bench/prims/distance/masked_nn.cu         |    4 -
 cpp/bench/prims/matrix/select_k.cu            |    4 -
 cpp/bench/prims/neighbors/knn.cuh             |    4 -
 .../prims/neighbors/refine_float_int64_t.cu   |    5 -
 .../prims/neighbors/refine_uint8_t_int64_t.cu |    4 -
 cpp/doxygen/Doxyfile                          |    1 +
 .../raft/cluster/detail/kmeans_common.cuh     |    1 +
 cpp/include/raft/cluster/specializations.cuh  |   12 +-
 cpp/include/raft/core/detail/macros.hpp       |   36 +-
 cpp/include/raft/core/logger-ext.hpp          |  128 ++
 cpp/include/raft/core/logger-inl.hpp          |  155 ++
 cpp/include/raft/core/logger-macros.hpp       |  106 ++
 cpp/include/raft/core/logger.hpp              |  310 +---
 cpp/include/raft/core/mdarray.hpp             |    1 +
 .../core/resource/device_memory_resource.hpp  |    3 +-
 cpp/include/raft/core/resources.hpp           |    3 +-
 .../detail/kernels/kernel_matrices.cuh        |    5 +-
 .../distance/detail/kernels/rbf_fin_op.cuh    |   51 +
 .../detail/pairwise_matrix/dispatch-ext.cuh   |  194 +++
 .../detail/pairwise_matrix/dispatch-inl.cuh   |  130 ++
 .../detail/pairwise_matrix/dispatch.cuh       |  125 +-
 cpp/include/raft/distance/distance-ext.cuh    | 1065 ++++++++++++++
 cpp/include/raft/distance/distance-inl.cuh    |  477 ++++++
 cpp/include/raft/distance/distance.cuh        |  468 +-----
 cpp/include/raft/distance/fused_l2_nn-ext.cuh |   82 ++
 cpp/include/raft/distance/fused_l2_nn-inl.cuh |  206 +++
 cpp/include/raft/distance/fused_l2_nn.cuh     |  216 +--
 .../raft/distance/fused_l2_nn_helpers.cuh     |   49 +
 cpp/include/raft/distance/specializations.cuh |   14 +-
 .../detail/00_write_template.py               |  148 --
 .../specializations/detail/canberra.cuh       |   40 -
 .../specializations/detail/correlation.cuh    |   40 -
 .../specializations/detail/cosine.cuh         |   40 -
 .../detail/hamming_unexpanded.cuh             |   40 -
 .../detail/hellinger_expanded.cuh             |   40 -
 .../specializations/detail/inner_product.cuh  |   52 -
 .../specializations/detail/jensen_shannon.cuh |   40 -
 .../specializations/detail/kernels.cuh        |   31 -
 .../specializations/detail/kl_divergence.cuh  |   40 -
 .../distance/specializations/detail/l1.cuh    |   40 -
 .../specializations/detail/l2_expanded.cuh    |   40 -
 .../specializations/detail/l2_unexpanded.cuh  |   40 -
 .../distance/specializations/detail/l_inf.cuh |   40 -
 .../specializations/detail/lp_unexpanded.cuh  |   40 -
 .../specializations/detail/russel_rao.cuh     |   40 -
 .../distance/specializations/distance.cuh     |   22 +-
 .../specializations/fused_l2_nn_min.cuh       |  117 +-
 .../linalg/detail/coalesced_reduction-ext.cuh |   73 +
 .../linalg/detail/coalesced_reduction-inl.cuh |  368 +++++
 .../linalg/detail/coalesced_reduction.cuh     |  358 +----
 .../raft/matrix/detail/select_k-ext.cuh       |   65 +
 .../raft/matrix/detail/select_k-inl.cuh       |   91 ++
 cpp/include/raft/matrix/detail/select_k.cuh   |   79 +-
 .../raft/matrix/detail/select_warpsort.cuh    |    2 +-
 cpp/include/raft/matrix/specializations.cuh   |    7 +-
 .../specializations/detail/select_k.cuh       |   35 +-
 cpp/include/raft/neighbors/ball_cover-ext.cuh |  124 ++
 cpp/include/raft/neighbors/ball_cover-inl.cuh |  395 +++++
 cpp/include/raft/neighbors/ball_cover.cuh     |  381 +----
 .../raft/neighbors/brute_force-ext.cuh        |  109 ++
 .../raft/neighbors/brute_force-inl.cuh        |  280 ++++
 cpp/include/raft/neighbors/brute_force.cuh    |  268 +---
 .../detail/ivf_flat_interleaved_scan-ext.cuh  |   65 +
 .../detail/ivf_flat_interleaved_scan-inl.cuh  | 1076 ++++++++++++++
 .../detail/ivf_flat_interleaved_scan.cuh}     |   11 +-
 .../neighbors/detail/ivf_flat_search-ext.cuh  |   58 +
 .../neighbors/detail/ivf_flat_search-inl.cuh  |  234 +++
 .../raft/neighbors/detail/ivf_flat_search.cuh | 1280 +----------------
 .../neighbors/detail/ivf_flat_serialize.cuh   |    1 +
 .../detail/ivf_pq_compute_similarity-ext.cuh  |  185 +++
 .../detail/ivf_pq_compute_similarity-inl.cuh  |  845 +++++++++++
 .../detail/ivf_pq_compute_similarity.cuh}     |   11 +-
 .../detail/ivf_pq_dummy_block_sort.cuh        |   39 +
 .../raft/neighbors/detail/ivf_pq_fp_8bit.cuh  |  113 ++
 .../raft/neighbors/detail/ivf_pq_search.cuh   |  907 +-----------
 .../raft/neighbors/detail/knn_brute_force.cuh |    3 +-
 cpp/include/raft/neighbors/detail/refine.cuh  |   11 +-
 .../neighbors/detail/selection_faiss-ext.cuh  |   61 +
 .../neighbors/detail/selection_faiss-inl.cuh  |  163 +++
 .../raft/neighbors/detail/selection_faiss.cuh |  157 +-
 .../detail/selection_faiss_helpers.cuh}       |   22 +-
 cpp/include/raft/neighbors/ivf_flat-ext.cuh   |  185 +++
 cpp/include/raft/neighbors/ivf_flat-inl.cuh   |  469 ++++++
 cpp/include/raft/neighbors/ivf_flat.cuh       |  459 +-----
 cpp/include/raft/neighbors/ivf_flat_types.hpp |   10 +-
 cpp/include/raft/neighbors/ivf_pq-ext.cuh     |  170 +++
 cpp/include/raft/neighbors/ivf_pq-inl.cuh     |  355 +++++
 cpp/include/raft/neighbors/ivf_pq.cuh         |  343 +----
 cpp/include/raft/neighbors/refine-ext.cuh     |   78 +
 cpp/include/raft/neighbors/refine-inl.cuh     |  105 ++
 cpp/include/raft/neighbors/refine.cuh         |   93 +-
 .../raft/neighbors/specializations.cuh        |   17 +-
 .../neighbors/specializations/ball_cover.cuh  |   41 +-
 .../neighbors/specializations/brute_force.cuh |   34 +-
 .../detail/ivf_pq_compute_similarity.cuh      |   38 +-
 .../specializations/fused_l2_knn.cuh          |   72 +-
 .../neighbors/specializations/ivf_flat.cuh    |   65 +-
 .../raft/neighbors/specializations/ivf_pq.cuh |   63 +-
 .../raft/neighbors/specializations/refine.cuh |   39 +-
 .../raft/sparse/neighbors/specializations.cuh |   10 +-
 .../raft/spatial/knn/detail/ann_utils.cuh     |    1 -
 .../spatial/knn/detail/ball_cover/common.cuh  |   39 +-
 .../knn/detail/ball_cover/registers-ext.cuh   |  129 ++
 .../knn/detail/ball_cover/registers-inl.cuh   |  780 ++++++++++
 .../knn/detail/ball_cover/registers.cuh       |  767 +---------
 .../knn/detail/ball_cover/registers_types.cuh |   66 +
 .../spatial/knn/detail/fused_l2_knn-ext.cuh   |   70 +
 .../spatial/knn/detail/fused_l2_knn-inl.cuh   | 1040 ++++++++++++++
 .../raft/spatial/knn/detail/fused_l2_knn.cuh  | 1028 +------------
 .../raft/spatial/knn/specializations.cuh      |    9 +-
 .../raft/spatial/knn/specializations/knn.cuh  |   31 +-
 cpp/include/raft/spectral/specializations.cuh |   12 +-
 cpp/include/raft/stats/specializations.cuh    |   12 +-
 cpp/include/raft/util/cudart_utils.hpp        |   52 +-
 cpp/include/raft/util/detail/cub_wrappers.cuh |    4 +-
 .../raft/util/memory_pool-ext.hpp}            |   17 +-
 cpp/include/raft/util/memory_pool-inl.hpp     |   76 +
 .../raft/util/memory_pool.hpp}                |   11 +-
 cpp/include/raft/util/raft_explicit.hpp       |   88 ++
 .../raft_internal/matrix/select_k.cuh         |    7 +-
 .../raft_internal/neighbors/naive_knn.cuh     |    4 -
 .../logger.cpp}                               |    8 +-
 .../pairwise_matrix/dispatch_00_generate.py   |  194 +++
 ...patch_canberra_double_double_double_int.cu |   55 +
 ...dispatch_canberra_float_float_float_int.cu |   50 +
 ...ch_correlation_double_double_double_int.cu |   55 +
 ...patch_correlation_float_float_float_int.cu |   55 +
 ...ispatch_cosine_double_double_double_int.cu |   51 +
 .../dispatch_cosine_float_float_float_int.cu  |   51 +
 ...ing_unexpanded_double_double_double_int.cu |   50 +
 ...amming_unexpanded_float_float_float_int.cu |   50 +
 ...inger_expanded_double_double_double_int.cu |   55 +
 ...ellinger_expanded_float_float_float_int.cu |   50 +
 ...jensen_shannon_double_double_double_int.cu |   55 +
 ...ch_jensen_shannon_float_float_float_int.cu |   55 +
 ..._kl_divergence_double_double_double_int.cu |   50 +
 ...tch_kl_divergence_float_float_float_int.cu |   50 +
 .../dispatch_l1_double_double_double_int.cu   |   50 +
 .../dispatch_l1_float_float_float_int.cu      |   50 +
 ...ch_l2_expanded_double_double_double_int.cu |   51 +
 ...patch_l2_expanded_float_float_float_int.cu |   51 +
 ..._l2_unexpanded_double_double_double_int.cu |   55 +
 ...tch_l2_unexpanded_float_float_float_int.cu |   50 +
 ...dispatch_l_inf_double_double_double_int.cu |   50 +
 .../dispatch_l_inf_float_float_float_int.cu   |   50 +
 ..._lp_unexpanded_double_double_double_int.cu |   55 +
 ...tch_lp_unexpanded_float_float_float_int.cu |   50 +
 .../detail/pairwise_matrix/dispatch_rbf.cu    |   64 +
 ...tch_russel_rao_double_double_double_int.cu |   55 +
 ...spatch_russel_rao_float_float_float_int.cu |   50 +
 cpp/src/distance/distance.cu                  |  934 ++++++++++++
 cpp/src/distance/fused_l2_nn.cu               |   54 +
 .../detail/00_write_template.py               |  159 --
 .../canberra_double_double_double_int.cu      |   33 -
 .../detail/canberra_float_float_float_int.cu  |   33 -
 .../correlation_double_double_double_int.cu   |   33 -
 .../correlation_float_float_float_int.cu      |   33 -
 .../detail/cosine_double_double_double_int.cu |   34 -
 .../detail/cosine_float_float_float_int.cu    |   34 -
 ...ing_unexpanded_double_double_double_int.cu |   33 -
 ...amming_unexpanded_float_float_float_int.cu |   33 -
 ...inger_expanded_double_double_double_int.cu |   33 -
 ...ellinger_expanded_float_float_float_int.cu |   33 -
 .../inner_product_double_double_double_int.cu |   38 -
 .../inner_product_float_float_float_int.cu    |   37 -
 ...jensen_shannon_double_double_double_int.cu |   34 -
 .../jensen_shannon_float_float_float_int.cu   |   34 -
 .../detail/kernels/tanh_kernel_double.cu      |   20 -
 .../kl_divergence_double_double_double_int.cu |   33 -
 .../kl_divergence_float_float_float_int.cu    |   33 -
 .../detail/l1_double_double_double_int.cu     |   33 -
 .../detail/l1_float_float_float_int.cu        |   33 -
 .../l2_expanded_double_double_double_int.cu   |   34 -
 .../l2_expanded_float_float_float_int.cu      |   34 -
 .../l2_unexpanded_double_double_double_int.cu |   33 -
 .../l2_unexpanded_float_float_float_int.cu    |   33 -
 .../detail/l_inf_double_double_double_int.cu  |   33 -
 .../detail/l_inf_float_float_float_int.cu     |   33 -
 .../lp_unexpanded_double_double_double_int.cu |   33 -
 .../lp_unexpanded_float_float_float_int.cu    |   33 -
 .../russel_rao_double_double_double_int.cu    |   33 -
 .../russel_rao_float_float_float_int.cu       |   33 -
 .../specializations/fused_l2_nn_double_int.cu |   51 -
 .../fused_l2_nn_double_int64.cu               |   51 -
 .../specializations/fused_l2_nn_float_int.cu  |   51 -
 .../fused_l2_nn_float_int64.cu                |   51 -
 cpp/src/linalg/detail/coalesced_reduction.cu  |   69 +
 .../matrix/detail/select_k_double_int64_t.cu  |   33 +
 .../matrix/detail/select_k_double_uint32_t.cu |   34 +
 .../matrix/detail/select_k_float_int64_t.cu   |   33 +
 .../matrix/detail/select_k_float_uint32_t.cu  |   33 +
 .../matrix/detail/select_k_half_int64_t.cu    |   33 +
 .../matrix/detail/select_k_half_uint32_t.cu   |   33 +
 .../detail/select_k_float_int64_t.cu          |   36 -
 .../detail/select_k_float_uint32_t.cu         |   36 -
 .../detail/select_k_half_int64_t.cu           |   36 -
 .../detail/select_k_half_uint32_t.cu          |   36 -
 cpp/src/neighbors/ball_cover.cu               |   66 +
 cpp/src/neighbors/brute_force_00_generate.py  |  106 ++
 .../brute_force_fused_l2_knn_float_int64_t.cu |   45 +
 .../brute_force_knn_int64_t_float_int64_t.cu  |   47 +
 .../brute_force_knn_int64_t_float_uint32_t.cu |   47 +
 .../brute_force_knn_int_float_int.cu          |   47 +
 ...brute_force_knn_uint32_t_float_uint32_t.cu |   47 +
 ...at_interleaved_scan_float_float_int64_t.cu |   36 +
 ...interleaved_scan_int8_t_int32_t_int64_t.cu |   36 +
 ...terleaved_scan_uint8_t_uint32_t_int64_t.cu |   36 +
 cpp/src/neighbors/detail/ivf_flat_search.cu   |   35 +
 .../ivf_pq_compute_similarity_00_generate.py  |  107 ++
 .../ivf_pq_compute_similarity_float_float.cu  |   73 +
 ...f_pq_compute_similarity_float_fp8_false.cu |   74 +
 ...vf_pq_compute_similarity_float_fp8_true.cu |   74 +
 .../ivf_pq_compute_similarity_float_half.cu   |   73 +
 ...vf_pq_compute_similarity_half_fp8_false.cu |   74 +
 ...ivf_pq_compute_similarity_half_fp8_true.cu |   74 +
 .../ivf_pq_compute_similarity_half_half.cu    |   73 +
 .../detail/selection_faiss_00_generate.py     |   75 +
 .../detail/selection_faiss_int32_t_float.cu   |   44 +
 .../detail/selection_faiss_int_double.cu      |   44 +
 .../detail/selection_faiss_long_float.cu      |   44 +
 .../detail/selection_faiss_size_t_double.cu   |   44 +
 .../detail/selection_faiss_size_t_float.cu    |   44 +
 .../detail/selection_faiss_uint32_t_float.cu  |   44 +
 cpp/src/neighbors/ivf_flat_00_generate.py     |  148 ++
 .../neighbors/ivf_flat_build_float_int64_t.cu |   50 +
 .../ivf_flat_build_int8_t_int64_t.cu          |   50 +
 .../ivf_flat_build_uint8_t_int64_t.cu         |   50 +
 .../ivf_flat_extend_float_int64_t.cu          |   58 +
 .../ivf_flat_extend_int8_t_int64_t.cu         |   58 +
 .../ivf_flat_extend_uint8_t_int64_t.cu        |   58 +
 .../ivf_flat_search_float_int64_t.cu          |   49 +
 .../ivf_flat_search_int8_t_int64_t.cu         |   49 +
 .../ivf_flat_search_uint8_t_int64_t.cu        |   49 +
 .../neighbors/ivfpq_build_float_int64_t.cu    |   36 +
 .../neighbors/ivfpq_build_int8_t_int64_t.cu   |   36 +
 .../neighbors/ivfpq_build_uint8_t_int64_t.cu  |   36 +
 .../neighbors/ivfpq_extend_float_int64_t.cu   |   50 +
 .../neighbors/ivfpq_extend_int8_t_int64_t.cu  |   50 +
 .../neighbors/ivfpq_extend_uint8_t_int64_t.cu |   50 +
 .../neighbors/ivfpq_search_float_int64_t.cu   |   43 +-
 .../neighbors/ivfpq_search_int8_t_int64_t.cu  |   43 +-
 .../neighbors/ivfpq_search_uint8_t_int64_t.cu |   43 +-
 cpp/src/neighbors/refine_00_generate.py       |   78 +
 cpp/src/neighbors/refine_float_float.cu       |   50 +
 cpp/src/neighbors/refine_int8_t_float.cu      |   50 +
 cpp/src/neighbors/refine_uint8_t_float.cu     |   50 +
 .../ball_cover_all_knn_query.cu               |   33 -
 .../specializations/ball_cover_build_index.cu |   31 -
 .../specializations/ball_cover_knn_query.cu   |   34 -
 .../detail/ball_cover_lowdim_pass_one_2d.cu   |   43 -
 .../detail/ball_cover_lowdim_pass_one_3d.cu   |   43 -
 .../detail/ball_cover_lowdim_pass_two_2d.cu   |   41 -
 .../detail/ball_cover_lowdim_pass_two_3d.cu   |   42 -
 .../brute_force_knn_impl_long_float_int.cu    |   39 -
 .../brute_force_knn_impl_long_float_uint.cu   |   39 -
 .../brute_force_knn_impl_uint_float_int.cu    |   39 -
 .../brute_force_knn_impl_uint_float_uint.cu   |   39 -
 ...pute_similarity_float_float_no_smem_lut.cu |   27 -
 .../compute_similarity_float_fp8s_fast.cu     |   27 -
 ...mpute_similarity_float_fp8s_no_basediff.cu |   28 -
 ...mpute_similarity_float_fp8s_no_smem_lut.cu |   28 -
 .../compute_similarity_float_fp8u_fast.cu     |   28 -
 ...mpute_similarity_float_fp8u_no_basediff.cu |   28 -
 ...mpute_similarity_float_fp8u_no_smem_lut.cu |   28 -
 .../compute_similarity_float_half_fast.cu     |   27 -
 ...mpute_similarity_float_half_no_basediff.cu |   27 -
 ...mpute_similarity_float_half_no_smem_lut.cu |   27 -
 .../compute_similarity_half_fp8s_fast.cu      |   27 -
 ...ompute_similarity_half_fp8s_no_basediff.cu |   27 -
 ...ompute_similarity_half_fp8s_no_smem_lut.cu |   27 -
 .../compute_similarity_half_fp8u_fast.cu      |   27 -
 ...ompute_similarity_half_fp8u_no_basediff.cu |   28 -
 ...ompute_similarity_half_fp8u_no_smem_lut.cu |   28 -
 .../compute_similarity_half_half_fast.cu      |   27 -
 ...ompute_similarity_half_half_no_basediff.cu |   27 -
 ...ompute_similarity_half_half_no_smem_lut.cu |   27 -
 ...mpute_similarity_float_half_no_smem_lut.cu |   27 -
 .../fused_l2_knn_int_float_false.cu           |   42 -
 .../fused_l2_knn_int_float_true.cu            |   41 -
 .../fused_l2_knn_long_float_false.cu          |   41 -
 .../fused_l2_knn_long_float_true.cu           |   41 -
 .../ivfflat_build_float_int64_t.cu            |   31 -
 .../ivfflat_build_int8_t_int64_t.cu           |   31 -
 .../ivfflat_build_uint8_t_int64_t.cu          |   31 -
 .../ivfflat_extend_float_int64_t.cu           |   37 -
 .../ivfflat_extend_int8_t_int64_t.cu          |   37 -
 .../ivfflat_extend_uint8_t_int64_t.cu         |   37 -
 .../ivfflat_search_float_int64_t.cu           |   58 -
 .../ivfflat_search_int8_t_int64_t.cu          |   49 -
 .../ivfflat_search_uint8_t_int64_t.cu         |   49 -
 .../ivfpq_build_float_int64_t.cu              |   32 -
 .../ivfpq_build_int8_t_int64_t.cu             |   32 -
 .../ivfpq_build_uint8_t_int64_t.cu            |   32 -
 .../ivfpq_extend_float_int64_t.cu             |   39 -
 .../ivfpq_extend_int8_t_int64_t.cu            |   39 -
 .../ivfpq_extend_uint8_t_int64_t.cu           |   39 -
 .../ivfpq_search_float_int64_t.cu             |   34 -
 .../ivfpq_search_int8_t_int64_t.cu            |   34 -
 .../ivfpq_search_uint8_t_int64_t.cu           |   34 -
 .../specializations/refine_d_int64_t_float.cu |   31 -
 .../refine_d_int64_t_int8_t.cu                |   31 -
 .../refine_d_int64_t_uint8_t.cu               |   31 -
 .../specializations/refine_h_int64_t_float.cu |   31 -
 .../refine_h_int64_t_int8_t.cu                |   30 -
 .../refine_h_int64_t_uint8_t.cu               |   31 -
 .../brute_force_knn_long_float_int.cu         |   42 -
 .../brute_force_knn_long_float_uint.cu        |   42 -
 .../brute_force_knn_uint32_t_float_int.cu     |   41 -
 .../brute_force_knn_uint32_t_float_uint.cu    |   42 -
 .../cluster/cluster_cost.cuh                  |    0
 .../cluster/cluster_cost_double.cu            |    1 -
 .../cluster/cluster_cost_float.cu             |    1 -
 .../cluster/kmeans_fit_double.cu              |    1 -
 .../cluster/kmeans_fit_float.cu               |    1 -
 .../cluster/kmeans_init_plus_plus_double.cu   |    1 -
 .../cluster/kmeans_init_plus_plus_float.cu    |    1 -
 .../cluster/update_centroids.cuh              |    1 -
 .../cluster/update_centroids_double.cu        |    1 -
 .../cluster/update_centroids_float.cu         |    1 -
 .../distance/fused_l2_min_arg.cu              |    4 +-
 .../distance/pairwise_distance.cu             |    1 -
 .../matrix/select_k_float_int64_t.cu          |    1 -
 .../brute_force_knn_int64_t_float.cu          |    2 -
 .../neighbors/ivf_flat_build.cu               |    2 +-
 .../neighbors/ivf_flat_search.cu              |    2 +-
 .../neighbors/ivfpq_build.cu                  |    1 -
 .../neighbors/ivfpq_deserialize.cu            |    2 +-
 .../neighbors/ivfpq_search_float_int64_t.cu   |   38 +
 .../neighbors/ivfpq_search_int8_t_int64_t.cu  |   38 +
 .../neighbors/ivfpq_search_uint8_t_int64_t.cu |   38 +
 .../neighbors/ivfpq_serialize.cu              |    2 +-
 .../neighbors/refine_d_int64_t_float.cu       |    1 -
 .../neighbors/refine_d_int64_t_int8_t.cu      |    1 -
 .../neighbors/refine_d_int64_t_uint8_t.cu     |    1 -
 .../neighbors/refine_h_int64_t_float.cu       |    1 -
 .../neighbors/refine_h_int64_t_int8_t.cu      |    1 -
 .../neighbors/refine_h_int64_t_uint8_t.cu     |    1 -
 cpp/src/{ => raft_runtime}/random/common.cuh  |    0
 ...rmat_rectangular_generator_int64_double.cu |    0
 .../rmat_rectangular_generator_int64_float.cu |    0
 .../rmat_rectangular_generator_int_double.cu  |    0
 .../rmat_rectangular_generator_int_float.cu   |    0
 .../knn/detail/ball_cover/registers.cu        |   60 +
 .../ball_cover/registers_00_generate.py       |  112 ++
 .../ball_cover/registers_pass_one_2d_dist.cu  |   48 +
 .../registers_pass_one_2d_euclidean.cu        |   48 +
 .../registers_pass_one_2d_haversine.cu        |   48 +
 .../ball_cover/registers_pass_one_3d_dist.cu  |   48 +
 .../registers_pass_one_3d_euclidean.cu        |   48 +
 .../registers_pass_one_3d_haversine.cu        |   48 +
 .../ball_cover/registers_pass_two_2d_dist.cu  |   48 +
 .../registers_pass_two_2d_euclidean.cu        |   48 +
 .../registers_pass_two_2d_haversine.cu        |   48 +
 .../ball_cover/registers_pass_two_3d_dist.cu  |   48 +
 .../registers_pass_two_3d_euclidean.cu        |   48 +
 .../registers_pass_two_3d_haversine.cu        |   48 +
 .../knn/detail/fused_l2_knn_int32_t_float.cu  |   40 +
 .../knn/detail/fused_l2_knn_int64_t_float.cu  |   40 +
 .../knn/detail/fused_l2_knn_uint32_t_float.cu |   41 +
 .../memory_pool.cpp}                          |    7 +-
 cpp/template/src/test_distance.cu             |    4 -
 cpp/test/CMakeLists.txt                       |   75 +-
 cpp/test/cluster/cluster_solvers.cu           |    4 -
 cpp/test/cluster/kmeans.cu                    |    4 -
 cpp/test/cluster/kmeans_balanced.cu           |    4 -
 cpp/test/cluster/kmeans_find_k.cu             |    4 -
 cpp/test/cluster/linkage.cu                   |   13 +-
 cpp/test/core/handle.cpp                      |    1 +
 cpp/test/distance/dist_adj.cu                 |   16 +-
 cpp/test/distance/dist_adj.cuh                |   71 +
 .../distance/dist_adj_distance_instance.cu    |   63 +
 cpp/test/distance/dist_adj_threshold.cuh      |   36 +
 cpp/test/distance/distance_base.cuh           |   27 +-
 cpp/test/distance/fused_l2_nn.cu              |    4 -
 cpp/test/distance/gram.cu                     |    4 -
 cpp/test/distance/masked_nn.cu                |    4 -
 cpp/test/ext_headers/00_generate.py           |   79 +
 .../ext_headers/raft_core_logger.cpp}         |   15 +-
 ...istance_detail_pairwise_matrix_dispatch.cu |   27 +
 .../ext_headers/raft_distance_distance.cu     |   27 +
 .../ext_headers/raft_distance_fused_l2_nn.cu  |   27 +
 .../raft_linalg_detail_coalesced_reduction.cu |   27 +
 .../raft_matrix_detail_select_k.cu            |   27 +
 .../ext_headers/raft_neighbors_ball_cover.cu  |   27 +
 .../ext_headers/raft_neighbors_brute_force.cu |   27 +
 ...ghbors_detail_ivf_flat_interleaved_scan.cu |   27 +
 .../raft_neighbors_detail_ivf_flat_search.cu  |   27 +
 ...ghbors_detail_ivf_pq_compute_similarity.cu |   27 +
 .../raft_neighbors_detail_selection_faiss.cu  |   27 +
 .../ext_headers/raft_neighbors_ivf_flat.cu    |   27 +
 cpp/test/ext_headers/raft_neighbors_ivf_pq.cu |   27 +
 cpp/test/ext_headers/raft_neighbors_refine.cu |   27 +
 ...spatial_knn_detail_ball_cover_registers.cu |   27 +
 .../raft_spatial_knn_detail_fused_l2_knn.cu}  |   15 +-
 .../ext_headers/raft_util_memory_pool.cpp     |   27 +
 cpp/test/linalg/eigen_solvers.cu              |    3 +-
 cpp/test/matrix/select_k.cu                   |   15 +-
 .../ann_cagra/test_float_uint32_t.cu          |    4 -
 cpp/test/neighbors/ann_ivf_flat.cuh           |    4 -
 .../ann_ivf_flat/test_float_int64_t.cu        |    4 -
 .../ann_ivf_flat/test_int8_t_int64_t.cu       |    4 -
 .../ann_ivf_flat/test_uint8_t_int64_t.cu      |    4 -
 cpp/test/neighbors/ann_ivf_pq.cuh             |    6 +-
 .../ann_ivf_pq/test_float_uint32_t.cu         |    7 +
 cpp/test/neighbors/ann_utils.cuh              |    1 +
 cpp/test/neighbors/ball_cover.cu              |    4 -
 cpp/test/neighbors/epsilon_neighborhood.cu    |    4 -
 cpp/test/neighbors/fused_l2_knn.cu            |   10 +-
 cpp/test/neighbors/knn.cu                     |    4 -
 cpp/test/neighbors/refine.cu                  |    4 -
 cpp/test/neighbors/selection.cu               |    5 +-
 cpp/test/neighbors/tiled_knn.cu               |    7 +-
 .../sparse/neighbors/connect_components.cu    |    9 +
 cpp/test/sparse/neighbors/knn_graph.cu        |    3 -
 cpp/test/stats/silhouette_score.cu            |    4 -
 cpp/test/stats/trustworthiness.cu             |    4 -
 docs/source/build.md                          |   12 +-
 docs/source/developer_guide.md                |   91 ++
 docs/source/using_libraft.md                  |   91 +-
 431 files changed, 18575 insertions(+), 12748 deletions(-)
 create mode 100644 cpp/include/raft/core/logger-ext.hpp
 create mode 100644 cpp/include/raft/core/logger-inl.hpp
 create mode 100644 cpp/include/raft/core/logger-macros.hpp
 create mode 100644 cpp/include/raft/distance/detail/kernels/rbf_fin_op.cuh
 create mode 100644 cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
 create mode 100644 cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
 create mode 100644 cpp/include/raft/distance/distance-ext.cuh
 create mode 100644 cpp/include/raft/distance/distance-inl.cuh
 create mode 100644 cpp/include/raft/distance/fused_l2_nn-ext.cuh
 create mode 100644 cpp/include/raft/distance/fused_l2_nn-inl.cuh
 create mode 100644 cpp/include/raft/distance/fused_l2_nn_helpers.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/00_write_template.py
 delete mode 100644 cpp/include/raft/distance/specializations/detail/canberra.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/correlation.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/cosine.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/inner_product.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/kernels.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/kl_divergence.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/l1.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/l2_expanded.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/l_inf.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh
 delete mode 100644 cpp/include/raft/distance/specializations/detail/russel_rao.cuh
 create mode 100644 cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh
 create mode 100644 cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh
 create mode 100644 cpp/include/raft/matrix/detail/select_k-ext.cuh
 create mode 100644 cpp/include/raft/matrix/detail/select_k-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/ball_cover-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/ball_cover-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/brute_force-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/brute_force-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
 rename cpp/{src/distance/specializations/detail/kernels/gram_matrix_base_double.cu => include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh} (76%)
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
 rename cpp/{src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu => include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh} (76%)
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/detail/selection_faiss-inl.cuh
 rename cpp/{src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu => include/raft/neighbors/detail/selection_faiss_helpers.cuh} (54%)
 create mode 100644 cpp/include/raft/neighbors/ivf_flat-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/ivf_flat-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/ivf_pq-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/ivf_pq-inl.cuh
 create mode 100644 cpp/include/raft/neighbors/refine-ext.cuh
 create mode 100644 cpp/include/raft/neighbors/refine-inl.cuh
 create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
 create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
 create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover/registers_types.cuh
 create mode 100644 cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh
 create mode 100644 cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh
 rename cpp/{src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu => include/raft/util/memory_pool-ext.hpp} (55%)
 create mode 100644 cpp/include/raft/util/memory_pool-inl.hpp
 rename cpp/{src/distance/specializations/detail/kernels/rbf_kernel_float.cu => include/raft/util/memory_pool.hpp} (72%)
 create mode 100644 cpp/include/raft/util/raft_explicit.hpp
 rename cpp/src/{distance/specializations/detail/kernels/gram_matrix_base_float.cu => core/logger.cpp} (71%)
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
 create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
 create mode 100644 cpp/src/distance/distance.cu
 create mode 100644 cpp/src/distance/fused_l2_nn.cu
 delete mode 100644 cpp/src/distance/specializations/detail/00_write_template.py
 delete mode 100644 cpp/src/distance/specializations/detail/canberra_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/canberra_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/correlation_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/correlation_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/cosine_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/cosine_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/inner_product_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/inner_product_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu
 delete mode 100644 cpp/src/distance/specializations/detail/kl_divergence_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l1_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l1_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l2_expanded_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l2_expanded_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l_inf_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/l_inf_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/russel_rao_double_double_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/detail/russel_rao_float_float_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/fused_l2_nn_double_int.cu
 delete mode 100644 cpp/src/distance/specializations/fused_l2_nn_double_int64.cu
 delete mode 100644 cpp/src/distance/specializations/fused_l2_nn_float_int.cu
 delete mode 100644 cpp/src/distance/specializations/fused_l2_nn_float_int64.cu
 create mode 100644 cpp/src/linalg/detail/coalesced_reduction.cu
 create mode 100644 cpp/src/matrix/detail/select_k_double_int64_t.cu
 create mode 100644 cpp/src/matrix/detail/select_k_double_uint32_t.cu
 create mode 100644 cpp/src/matrix/detail/select_k_float_int64_t.cu
 create mode 100644 cpp/src/matrix/detail/select_k_float_uint32_t.cu
 create mode 100644 cpp/src/matrix/detail/select_k_half_int64_t.cu
 create mode 100644 cpp/src/matrix/detail/select_k_half_uint32_t.cu
 delete mode 100644 cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu
 delete mode 100644 cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu
 delete mode 100644 cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu
 delete mode 100644 cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu
 create mode 100644 cpp/src/neighbors/ball_cover.cu
 create mode 100644 cpp/src/neighbors/brute_force_00_generate.py
 create mode 100644 cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
 create mode 100644 cpp/src/neighbors/brute_force_knn_int_float_int.cu
 create mode 100644 cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_flat_search.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
 create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_00_generate.py
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_int32_t_float.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_int_double.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_long_float.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_size_t_double.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_size_t_float.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_uint32_t_float.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_00_generate.py
 create mode 100644 cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_build_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
 create mode 100644 cpp/src/neighbors/refine_00_generate.py
 create mode 100644 cpp/src/neighbors/refine_float_float.cu
 create mode 100644 cpp/src/neighbors/refine_int8_t_float.cu
 create mode 100644 cpp/src/neighbors/refine_uint8_t_float.cu
 delete mode 100644 cpp/src/neighbors/specializations/ball_cover_all_knn_query.cu
 delete mode 100644 cpp/src/neighbors/specializations/ball_cover_build_index.cu
 delete mode 100644 cpp/src/neighbors/specializations/ball_cover_knn_query.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/detail/ivfpq_compute_similarity_float_half_no_smem_lut.cu
 delete mode 100644 cpp/src/neighbors/specializations/fused_l2_knn_int_float_false.cu
 delete mode 100644 cpp/src/neighbors/specializations/fused_l2_knn_int_float_true.cu
 delete mode 100644 cpp/src/neighbors/specializations/fused_l2_knn_long_float_false.cu
 delete mode 100644 cpp/src/neighbors/specializations/fused_l2_knn_long_float_true.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_build_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_extend_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_search_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_build_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_search_float_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_d_int64_t_float.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_d_int64_t_int8_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_h_int64_t_float.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_h_int64_t_int8_t.cu
 delete mode 100644 cpp/src/neighbors/specializations/refine_h_int64_t_uint8_t.cu
 delete mode 100644 cpp/src/nn/specializations/brute_force_knn_long_float_int.cu
 delete mode 100644 cpp/src/nn/specializations/brute_force_knn_long_float_uint.cu
 delete mode 100644 cpp/src/nn/specializations/brute_force_knn_uint32_t_float_int.cu
 delete mode 100644 cpp/src/nn/specializations/brute_force_knn_uint32_t_float_uint.cu
 rename cpp/src/{ => raft_runtime}/cluster/cluster_cost.cuh (100%)
 rename cpp/src/{ => raft_runtime}/cluster/cluster_cost_double.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/cluster_cost_float.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/kmeans_fit_double.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/kmeans_fit_float.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/kmeans_init_plus_plus_double.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/kmeans_init_plus_plus_float.cu (96%)
 rename cpp/src/{ => raft_runtime}/cluster/update_centroids.cuh (98%)
 rename cpp/src/{ => raft_runtime}/cluster/update_centroids_double.cu (97%)
 rename cpp/src/{ => raft_runtime}/cluster/update_centroids_float.cu (97%)
 rename cpp/src/{ => raft_runtime}/distance/fused_l2_min_arg.cu (97%)
 rename cpp/src/{ => raft_runtime}/distance/pairwise_distance.cu (97%)
 rename cpp/src/{ => raft_runtime}/matrix/select_k_float_int64_t.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/brute_force_knn_int64_t_float.cu (97%)
 rename cpp/src/{ => raft_runtime}/neighbors/ivf_flat_build.cu (98%)
 rename cpp/src/{ => raft_runtime}/neighbors/ivf_flat_search.cu (97%)
 rename cpp/src/{ => raft_runtime}/neighbors/ivfpq_build.cu (98%)
 rename cpp/src/{ => raft_runtime}/neighbors/ivfpq_deserialize.cu (95%)
 create mode 100644 cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
 create mode 100644 cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
 create mode 100644 cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
 rename cpp/src/{ => raft_runtime}/neighbors/ivfpq_serialize.cu (95%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_d_int64_t_float.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_d_int64_t_int8_t.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_d_int64_t_uint8_t.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_h_int64_t_float.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_h_int64_t_int8_t.cu (96%)
 rename cpp/src/{ => raft_runtime}/neighbors/refine_h_int64_t_uint8_t.cu (96%)
 rename cpp/src/{ => raft_runtime}/random/common.cuh (100%)
 rename cpp/src/{ => raft_runtime}/random/rmat_rectangular_generator_int64_double.cu (100%)
 rename cpp/src/{ => raft_runtime}/random/rmat_rectangular_generator_int64_float.cu (100%)
 rename cpp/src/{ => raft_runtime}/random/rmat_rectangular_generator_int_double.cu (100%)
 rename cpp/src/{ => raft_runtime}/random/rmat_rectangular_generator_int_float.cu (100%)
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
 create mode 100644 cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
 create mode 100644 cpp/src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
 create mode 100644 cpp/src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
 create mode 100644 cpp/src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
 rename cpp/src/{distance/specializations/detail/kernels/rbf_kernel_double.cu => util/memory_pool.cpp} (72%)
 create mode 100644 cpp/test/distance/dist_adj.cuh
 create mode 100644 cpp/test/distance/dist_adj_distance_instance.cu
 create mode 100644 cpp/test/distance/dist_adj_threshold.cuh
 create mode 100644 cpp/test/ext_headers/00_generate.py
 rename cpp/{src/distance/specializations/detail/kernels/tanh_kernel_float.cu => test/ext_headers/raft_core_logger.cpp} (72%)
 create mode 100644 cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu
 create mode 100644 cpp/test/ext_headers/raft_distance_distance.cu
 create mode 100644 cpp/test/ext_headers/raft_distance_fused_l2_nn.cu
 create mode 100644 cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu
 create mode 100644 cpp/test/ext_headers/raft_matrix_detail_select_k.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_ball_cover.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_brute_force.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_detail_selection_faiss.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_ivf_flat.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_ivf_pq.cu
 create mode 100644 cpp/test/ext_headers/raft_neighbors_refine.cu
 create mode 100644 cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu
 rename cpp/{src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu => test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu} (70%)
 create mode 100644 cpp/test/ext_headers/raft_util_memory_pool.cpp

diff --git a/README.md b/README.md
index 8de4c058dc..10cd7b16fc 100755
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ RAFT itself can be installed through conda, [CMake Package Manager (CPM)](https:
 
 The easiest way to install RAFT is through conda and several packages are provided.
 - `libraft-headers` RAFT headers
-- `libraft` (optional) shared library of pre-compiled template specializations and runtime APIs.
+- `libraft` (optional) shared library of pre-compiled template instantiations and runtime APIs.
 - `pylibraft` (optional) Python wrappers around RAFT algorithms and primitives.
 - `raft-dask` (optional) enables deployment of multi-node multi-GPU algorithms that use RAFT `raft::comms` in Dask clusters.
 
@@ -236,11 +236,11 @@ You can find an [example RAFT](cpp/template/README.md) project template in the `
 
 Additional CMake targets can be made available by adding components in the table below to the `RAFT_COMPONENTS` list above, separated by spaces. The `raft::raft` target will always be available. RAFT headers require, at a minimum, the CUDA toolkit libraries and RMM dependencies.
 
-| Component   | Target              | Description                                               | Base Dependencies                     |
-|-------------|---------------------|-----------------------------------------------------------|---------------------------------------|
-| n/a         | `raft::raft`        | Full RAFT header library                                  | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
-| compiled    | `raft::compiled`    | Pre-compiled template specializations and runtime library | raft::raft                            |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                       | raft::raft, UCX, NCCL                 |
+| Component   | Target              | Description                                              | Base Dependencies                      |
+|-------------|---------------------|----------------------------------------------------------|----------------------------------------|
+| n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
+| compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL                  |
 
 ### Source
 
@@ -287,7 +287,7 @@ The folder structure mirrors other RAPIDS repos, with the following folders:
     - `util`: Various reusable tools and utilities for accelerated algorithm development
   - `internal`: A private header-only component that hosts the code shared between benchmarks and tests.
   - `scripts`: Helpful scripts for development
-  - `src`: Compiled APIs and template specializations for the shared libraries
+  - `src`: Compiled APIs and template instantiations for the shared libraries
   - `template`: A skeleton template containing the bare-bones file structure and cmake configuration for writing applications with RAFT.
   - `test`: Googletests source code
 - `docs`: Source code and scripts for building library documentation (Uses breath, doxygen, & pydocs)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 62f9ac604e..cddfa4b38d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -263,181 +263,135 @@ set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled)
 if(RAFT_COMPILE_LIBRARY)
   add_library(
     raft_lib
-    src/distance/pairwise_distance.cu
-    src/distance/fused_l2_min_arg.cu
-    src/cluster/update_centroids_float.cu
-    src/cluster/update_centroids_double.cu
-    src/cluster/cluster_cost_float.cu
-    src/cluster/cluster_cost_double.cu
-    src/neighbors/refine_d_int64_t_float.cu
-    src/neighbors/refine_d_int64_t_int8_t.cu
-    src/neighbors/refine_d_int64_t_uint8_t.cu
-    src/neighbors/refine_h_int64_t_float.cu
-    src/neighbors/refine_h_int64_t_int8_t.cu
-    src/neighbors/refine_h_int64_t_uint8_t.cu
-    src/neighbors/specializations/refine_d_int64_t_float.cu
-    src/neighbors/specializations/refine_d_int64_t_int8_t.cu
-    src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
-    src/neighbors/specializations/refine_h_int64_t_float.cu
-    src/neighbors/specializations/refine_h_int64_t_int8_t.cu
-    src/neighbors/specializations/refine_h_int64_t_uint8_t.cu
-    src/cluster/kmeans_fit_float.cu
-    src/cluster/kmeans_fit_double.cu
-    src/cluster/kmeans_init_plus_plus_double.cu
-    src/cluster/kmeans_init_plus_plus_float.cu
-    src/distance/specializations/detail/canberra_double_double_double_int.cu
-    src/distance/specializations/detail/canberra_float_float_float_int.cu
-    src/distance/specializations/detail/correlation_double_double_double_int.cu
-    src/distance/specializations/detail/correlation_float_float_float_int.cu
-    src/distance/specializations/detail/cosine_double_double_double_int.cu
-    src/distance/specializations/detail/cosine_float_float_float_int.cu
-    src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
-    src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
-    src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
-    src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
-    src/distance/specializations/detail/inner_product_float_float_float_int.cu
-    src/distance/specializations/detail/inner_product_double_double_double_int.cu
-    src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
-    src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
-    src/distance/specializations/detail/kernels/gram_matrix_base_double.cu
-    src/distance/specializations/detail/kernels/gram_matrix_base_float.cu
-    src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
-    src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
-    # These are somehow missing a kernel definition which is causing a compile error.
-    # src/distance/specializations/detail/kernels/rbf_kernel_double.cu
-    # src/distance/specializations/detail/kernels/rbf_kernel_float.cu
-    src/neighbors/brute_force_knn_int64_t_float.cu
-    src/distance/specializations/detail/kernels/tanh_kernel_double.cu
-    src/distance/specializations/detail/kernels/tanh_kernel_float.cu
-    src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
-    src/distance/specializations/detail/kl_divergence_double_double_double_int.cu
-    src/distance/specializations/detail/l1_float_float_float_int.cu
-    src/distance/specializations/detail/l1_double_double_double_int.cu
-    src/distance/specializations/detail/l2_expanded_float_float_float_int.cu
-    src/distance/specializations/detail/l2_expanded_double_double_double_int.cu
-    src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
-    src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
-    src/distance/specializations/detail/l_inf_double_double_double_int.cu
-    src/distance/specializations/detail/l_inf_float_float_float_int.cu
-    src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
-    src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
-    src/distance/specializations/detail/russel_rao_double_double_double_int.cu
-    src/distance/specializations/detail/russel_rao_float_float_float_int.cu
-    src/distance/specializations/fused_l2_nn_double_int.cu
-    src/distance/specializations/fused_l2_nn_double_int64.cu
-    src/distance/specializations/fused_l2_nn_float_int.cu
-    src/distance/specializations/fused_l2_nn_float_int64.cu
-    src/matrix/select_k_float_int64_t.cu
-    src/matrix/specializations/detail/select_k_float_uint32_t.cu
-    src/matrix/specializations/detail/select_k_float_int64_t.cu
-    src/matrix/specializations/detail/select_k_half_uint32_t.cu
-    src/matrix/specializations/detail/select_k_half_int64_t.cu
-    src/neighbors/ivfpq_build.cu
-    src/neighbors/ivfpq_deserialize.cu
-    src/neighbors/ivfpq_serialize.cu
+    src/core/logger.cpp
+    src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_rbf.cu
+    src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
+    src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
+    src/distance/distance.cu
+    src/distance/fused_l2_nn.cu
+    src/linalg/detail/coalesced_reduction.cu
+    src/matrix/detail/select_k_double_int64_t.cu
+    src/matrix/detail/select_k_double_uint32_t.cu
+    src/matrix/detail/select_k_float_int64_t.cu
+    src/matrix/detail/select_k_float_uint32_t.cu
+    src/matrix/detail/select_k_half_int64_t.cu
+    src/matrix/detail/select_k_half_uint32_t.cu
+    src/neighbors/ball_cover.cu
+    src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
+    src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
+    src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
+    src/neighbors/brute_force_knn_int_float_int.cu
+    src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
+    src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
+    src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
+    src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
+    src/neighbors/detail/ivf_flat_search.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
+    src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
+    src/neighbors/detail/selection_faiss_int32_t_float.cu
+    src/neighbors/detail/selection_faiss_int_double.cu
+    src/neighbors/detail/selection_faiss_long_float.cu
+    src/neighbors/detail/selection_faiss_size_t_double.cu
+    src/neighbors/detail/selection_faiss_size_t_float.cu
+    src/neighbors/detail/selection_faiss_uint32_t_float.cu
+    src/neighbors/ivf_flat_build_float_int64_t.cu
+    src/neighbors/ivf_flat_build_int8_t_int64_t.cu
+    src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
+    src/neighbors/ivf_flat_extend_float_int64_t.cu
+    src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
+    src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
+    src/neighbors/ivf_flat_search_float_int64_t.cu
+    src/neighbors/ivf_flat_search_int8_t_int64_t.cu
+    src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
+    src/neighbors/ivfpq_build_float_int64_t.cu
+    src/neighbors/ivfpq_build_int8_t_int64_t.cu
+    src/neighbors/ivfpq_build_uint8_t_int64_t.cu
+    src/neighbors/ivfpq_extend_float_int64_t.cu
+    src/neighbors/ivfpq_extend_int8_t_int64_t.cu
+    src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
     src/neighbors/ivfpq_search_float_int64_t.cu
     src/neighbors/ivfpq_search_int8_t_int64_t.cu
     src/neighbors/ivfpq_search_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
-    src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu
-    src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu
-    src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu
-    src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
-    src/random/rmat_rectangular_generator_int_double.cu
-    src/random/rmat_rectangular_generator_int64_double.cu
-    src/random/rmat_rectangular_generator_int_float.cu
-    src/random/rmat_rectangular_generator_int64_float.cu
-    src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
-    src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
-    src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
-    src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
-    src/neighbors/specializations/ball_cover_all_knn_query.cu
-    src/neighbors/specializations/ball_cover_build_index.cu
-    src/neighbors/specializations/ball_cover_knn_query.cu
-    src/neighbors/specializations/fused_l2_knn_long_float_true.cu
-    src/neighbors/specializations/fused_l2_knn_long_float_false.cu
-    src/neighbors/specializations/fused_l2_knn_int_float_true.cu
-    src/neighbors/specializations/fused_l2_knn_int_float_false.cu
-    src/neighbors/ivf_flat_search.cu
-    src/neighbors/ivf_flat_build.cu
-    src/neighbors/specializations/ivfflat_build_float_int64_t.cu
-    src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfflat_extend_float_int64_t.cu
-    src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfflat_search_float_int64_t.cu
-    src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
-    src/neighbors/ivfpq_build.cu
-    src/neighbors/ivfpq_deserialize.cu
-    src/neighbors/ivfpq_serialize.cu
-    src/neighbors/ivfpq_search_float_int64_t.cu
-    src/neighbors/ivfpq_search_int8_t_int64_t.cu
-    src/neighbors/ivfpq_search_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_float_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
-    src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
-    src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
-    src/random/rmat_rectangular_generator_int_double.cu
-    src/random/rmat_rectangular_generator_int64_double.cu
-    src/random/rmat_rectangular_generator_int_float.cu
-    src/random/rmat_rectangular_generator_int64_float.cu
+    src/neighbors/refine_float_float.cu
+    src/neighbors/refine_int8_t_float.cu
+    src/neighbors/refine_uint8_t_float.cu
+    src/raft_runtime/cluster/cluster_cost.cuh
+    src/raft_runtime/cluster/cluster_cost_double.cu
+    src/raft_runtime/cluster/cluster_cost_float.cu
+    src/raft_runtime/cluster/kmeans_fit_double.cu
+    src/raft_runtime/cluster/kmeans_fit_float.cu
+    src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
+    src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
+    src/raft_runtime/cluster/update_centroids.cuh
+    src/raft_runtime/cluster/update_centroids_double.cu
+    src/raft_runtime/cluster/update_centroids_float.cu
+    src/raft_runtime/distance/fused_l2_min_arg.cu
+    src/raft_runtime/distance/pairwise_distance.cu
+    src/raft_runtime/matrix/select_k_float_int64_t.cu
+    src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
+    src/raft_runtime/neighbors/ivf_flat_build.cu
+    src/raft_runtime/neighbors/ivf_flat_search.cu
+    src/raft_runtime/neighbors/ivfpq_build.cu
+    src/raft_runtime/neighbors/ivfpq_deserialize.cu
+    src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
+    src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
+    src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
+    src/raft_runtime/neighbors/ivfpq_serialize.cu
+    src/raft_runtime/neighbors/refine_d_int64_t_float.cu
+    src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
+    src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
+    src/raft_runtime/neighbors/refine_h_int64_t_float.cu
+    src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
+    src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
+    src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu
+    src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
+    src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
+    src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
+    src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
+    src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
+    src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
+    src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
+    src/util/memory_pool.cpp
   )
   set_target_properties(
     raft_lib
@@ -463,7 +417,13 @@ if(RAFT_COMPILE_LIBRARY)
     raft_lib PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RAFT_CXX_FLAGS}>"
                      "$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
   )
-  target_compile_definitions(raft_lib INTERFACE "RAFT_COMPILED")
+
+  # RAFT_COMPILED is set during compilation of libraft.so as well as downstream libraries (due to
+  # "PUBLIC")
+  target_compile_definitions(raft_lib PUBLIC "RAFT_COMPILED")
+
+  # RAFT_EXPLICIT_INSTANTIATE_ONLY is set during compilation of libraft.so (due to "PRIVATE")
+  target_compile_definitions(raft_lib PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY")
 
   # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
   target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index d8e98ce2a9..baff1b1c45 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -22,10 +22,6 @@
 #include <type_traits>
 #include <utility>
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include "../common/ann_types.hpp"
 #include "../common/benchmark_util.hpp"
 #undef WARP_SIZE
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat.cu b/cpp/bench/ann/src/raft/raft_ivf_flat.cu
index ff108080b5..bcd23723a4 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat.cu
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat.cu
@@ -15,12 +15,8 @@
  */
 #include "raft_ivf_flat_wrapper.h"
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft::bench::ann {
 template class RaftIvfFlatGpu<float, int64_t>;
 template class RaftIvfFlatGpu<uint8_t, int64_t>;
 template class RaftIvfFlatGpu<int8_t, int64_t>;
-}  // namespace raft::bench::ann
\ No newline at end of file
+}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index 8b2a7d329b..0a80eef1b5 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -29,6 +29,7 @@
 #include <raft/neighbors/ivf_flat_types.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq.cu b/cpp/bench/ann/src/raft/raft_ivf_pq.cu
index 338bc9a32f..2efe14631b 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq.cu
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq.cu
@@ -15,10 +15,6 @@
  */
 #include "raft_ivf_pq_wrapper.h"
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft::bench::ann {
 template class RaftIvfPQ<float, int64_t>;
 template class RaftIvfPQ<uint8_t, int64_t>;
diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt
index f6499623dd..505ca32886 100644
--- a/cpp/bench/prims/CMakeLists.txt
+++ b/cpp/bench/prims/CMakeLists.txt
@@ -17,7 +17,7 @@
 
 function(ConfigureBench)
 
-  set(options OPTIONAL LIB)
+  set(options OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY)
   set(oneValueArgs NAME)
   set(multiValueArgs PATH TARGETS CONFIGURATIONS)
 
@@ -55,6 +55,10 @@ function(ConfigureBench)
                           "$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
   )
 
+  if(ConfigureTest_EXPLICIT_INSTANTIATE_ONLY)
+    target_compile_definitions(${BENCH_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY")
+  endif()
+
   target_include_directories(
     ${BENCH_NAME} PUBLIC "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/bench/prims>"
   )
@@ -71,7 +75,7 @@ endfunction()
 if(BUILD_PRIMS_BENCH)
   ConfigureBench(
     NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu
-    bench/prims/main.cpp OPTIONAL LIB
+    bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureBench(
@@ -93,6 +97,7 @@ if(BUILD_PRIMS_BENCH)
     bench/prims/main.cpp
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureBench(
@@ -112,7 +117,7 @@ if(BUILD_PRIMS_BENCH)
 
   ConfigureBench(
     NAME MATRIX_BENCH PATH bench/prims/matrix/argmin.cu bench/prims/matrix/gather.cu
-    bench/prims/matrix/select_k.cu bench/prims/main.cpp OPTIONAL LIB
+    bench/prims/matrix/select_k.cu bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureBench(
@@ -139,5 +144,6 @@ if(BUILD_PRIMS_BENCH)
     bench/prims/main.cpp
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 endif()
diff --git a/cpp/bench/prims/cluster/kmeans.cu b/cpp/bench/prims/cluster/kmeans.cu
index af7afb8037..3147960f72 100644
--- a/cpp/bench/prims/cluster/kmeans.cu
+++ b/cpp/bench/prims/cluster/kmeans.cu
@@ -18,10 +18,6 @@
 #include <raft/cluster/kmeans.cuh>
 #include <raft/cluster/kmeans_types.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/cluster/specializations.cuh>
-#endif
-
 namespace raft::bench::cluster {
 
 struct KMeansBenchParams {
diff --git a/cpp/bench/prims/cluster/kmeans_balanced.cu b/cpp/bench/prims/cluster/kmeans_balanced.cu
index 6bda43bdb2..42a8f7967c 100644
--- a/cpp/bench/prims/cluster/kmeans_balanced.cu
+++ b/cpp/bench/prims/cluster/kmeans_balanced.cu
@@ -18,10 +18,6 @@
 #include <raft/cluster/kmeans_balanced.cuh>
 #include <raft/random/rng.cuh>
 
-#if defined RAFT_COMPILED
-#include <raft/cluster/specializations.cuh>
-#endif
-
 namespace raft::bench::cluster {
 
 struct KMeansBalancedBenchParams {
diff --git a/cpp/bench/prims/distance/distance_common.cuh b/cpp/bench/prims/distance/distance_common.cuh
index 9b5d67a46f..dff3401b62 100644
--- a/cpp/bench/prims/distance/distance_common.cuh
+++ b/cpp/bench/prims/distance/distance_common.cuh
@@ -17,9 +17,6 @@
 #include <common/benchmark.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/util/cudart_utils.hpp>
-#if defined RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
 #include <rmm/device_uvector.hpp>
 
 namespace raft::bench::distance {
diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu
index a5115407dd..24c0cbf8f9 100644
--- a/cpp/bench/prims/distance/fused_l2_nn.cu
+++ b/cpp/bench/prims/distance/fused_l2_nn.cu
@@ -18,9 +18,6 @@
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
-#if defined RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
 #include <rmm/device_uvector.hpp>
 
 namespace raft::bench::distance {
diff --git a/cpp/bench/prims/distance/kernels.cu b/cpp/bench/prims/distance/kernels.cu
index 4407bdcf83..53d97c1fc7 100644
--- a/cpp/bench/prims/distance/kernels.cu
+++ b/cpp/bench/prims/distance/kernels.cu
@@ -13,10 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#if defined RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 #include <common/benchmark.hpp>
 #include <memory>
 #include <raft/core/device_resources.hpp>
diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/prims/distance/masked_nn.cu
index f9f234187d..c804ecb3a1 100644
--- a/cpp/bench/prims/distance/masked_nn.cu
+++ b/cpp/bench/prims/distance/masked_nn.cu
@@ -30,10 +30,6 @@
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#ifdef RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 namespace raft::bench::distance::masked_nn {
 
 // Introduce various sparsity patterns
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 1ff584ca58..8e75280029 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -23,10 +23,6 @@
 #include <raft/sparse/detail/utils.h>
 #include <raft/util/cudart_utils.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/matrix/specializations.cuh>
-#endif
-
 #include <raft/matrix/detail/select_radix.cuh>
 #include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/matrix/select_k.cuh>
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index 5431b9492e..afb3bf9da3 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -24,10 +24,6 @@
 #include <raft/neighbors/ivf_pq.cuh>
 #include <raft/spatial/knn/knn.cuh>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <rmm/mr/device/managed_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
diff --git a/cpp/bench/prims/neighbors/refine_float_int64_t.cu b/cpp/bench/prims/neighbors/refine_float_int64_t.cu
index 43be330e9b..bbedc1ae64 100644
--- a/cpp/bench/prims/neighbors/refine_float_int64_t.cu
+++ b/cpp/bench/prims/neighbors/refine_float_int64_t.cu
@@ -17,11 +17,6 @@
 #include "refine.cuh"
 #include <common/benchmark.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations/refine.cuh>
-#include <raft/spatial/knn/specializations.cuh>
-#endif
-
 using namespace raft::neighbors;
 
 namespace raft::bench::neighbors {
diff --git a/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu b/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu
index 1d7cb8c8aa..4952361f03 100644
--- a/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu
+++ b/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu
@@ -17,10 +17,6 @@
 #include "refine.cuh"
 #include <common/benchmark.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 using namespace raft::neighbors;
 
 namespace raft::bench::neighbors {
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 17a1e0caca..1948169c91 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -918,6 +918,7 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
 
+# TODO: remove specializations from exclude patterns when headers have been removed.
 EXCLUDE_PATTERNS       = */detail/* \
                          */specializations/* \
                          */thirdparty/*
diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh
index 76fc22e99e..cca1cbb6e9 100644
--- a/cpp/include/raft/cluster/detail/kmeans_common.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh
@@ -38,6 +38,7 @@
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
+#include <raft/linalg/norm.cuh>
 #include <raft/linalg/reduce_rows_by_key.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/matrix/gather.cuh>
diff --git a/cpp/include/raft/cluster/specializations.cuh b/cpp/include/raft/cluster/specializations.cuh
index 9b68d7adc9..9588a7f329 100644
--- a/cpp/include/raft/cluster/specializations.cuh
+++ b/cpp/include/raft/cluster/specializations.cuh
@@ -13,12 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef __CLUSTER_SPECIALIZATIONS_H
-#define __CLUSTER_SPECIALIZATIONS_H
-
 #pragma once
 
-#include <raft/distance/specializations.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#endif
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/core/detail/macros.hpp b/cpp/include/raft/core/detail/macros.hpp
index bfb47437ad..390acea697 100644
--- a/cpp/include/raft/core/detail/macros.hpp
+++ b/cpp/include/raft/core/detail/macros.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,6 +40,40 @@
 #define RAFT_INLINE_FUNCTION _RAFT_HOST_DEVICE _RAFT_FORCEINLINE
 #endif
 
+// The RAFT_INLINE_CONDITIONAL is a conditional inline specifier that removes
+// the inline specification when RAFT_COMPILED is defined.
+//
+// When RAFT_COMPILED is not defined, functions may be defined in multiple
+// translation units and we do not want that to lead to linker errors.
+//
+// When RAFT_COMPILED is defined, this serves two purposes:
+//
+// 1. It triggers a multiple definition error message when memory_pool-inl.hpp
+// (for instance) is accidentally included in multiple translation units.
+//
+// 2. We function definitions to be non-inline, because non-inline functions
+// symbols are always exported in the object symbol table. For inline functions,
+// the compiler may elide the external symbol, which results in linker errors.
+#ifdef RAFT_COMPILED
+#define RAFT_INLINE_CONDITIONAL
+#else
+#define RAFT_INLINE_CONDITIONAL inline
+#endif  // RAFT_COMPILED
+
+// The RAFT_WEAK_FUNCTION specificies that:
+//
+// 1. A function may be defined in multiple translation units (like inline)
+//
+// 2. Must still emit an external symbol (unlike inline). This enables declaring
+// a function signature in an `-ext` header and defining it in a source file.
+//
+// From
+// https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes:
+//
+// "The weak attribute causes a declaration of an external symbol to be emitted
+// as a weak symbol rather than a global."
+#define RAFT_WEAK_FUNCTION __attribute__((weak))
+
 /**
  * Some macro magic to remove optional parentheses of a macro argument.
  * See https://stackoverflow.com/a/62984543
diff --git a/cpp/include/raft/core/logger-ext.hpp b/cpp/include/raft/core/logger-ext.hpp
new file mode 100644
index 0000000000..69688560c7
--- /dev/null
+++ b/cpp/include/raft/core/logger-ext.hpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <memory>         // std::unique_ptr
+#include <string>         // std::string
+#include <unordered_map>  // std::unordered_map
+
+namespace raft {
+
+static const std::string RAFT_NAME = "raft";
+static const std::string default_log_pattern("[%L] [%H:%M:%S.%f] %v");
+
+/**
+ * @brief The main Logging class for raft library.
+ *
+ * This class acts as a thin wrapper over the underlying `spdlog` interface. The
+ * design is done in this way in order to avoid us having to also ship `spdlog`
+ * header files in our installation.
+ *
+ * @todo This currently only supports logging to stdout. Need to add support in
+ *       future to add custom loggers as well [Issue #2046]
+ */
+class logger {
+ public:
+  // @todo setting the logger once per process with
+  logger(std::string const& name_ = "");
+  /**
+   * @brief Singleton method to get the underlying logger object
+   *
+   * @return the singleton logger object
+   */
+  static logger& get(std::string const& name = "");
+
+  /**
+   * @brief Set the logging level.
+   *
+   * Only messages with level equal or above this will be printed
+   *
+   * @param[in] level logging level
+   *
+   * @note The log level will actually be set only if the input is within the
+   *       range [RAFT_LEVEL_TRACE, RAFT_LEVEL_OFF]. If it is not, then it'll
+   *       be ignored. See documentation of decisiontree for how this gets used
+   */
+  void set_level(int level);
+
+  /**
+   * @brief Set the logging pattern
+   *
+   * @param[in] pattern the pattern to be set. Refer this link
+   *                    https://github.com/gabime/spdlog/wiki/3.-Custom-formatting
+   *                    to know the right syntax of this pattern
+   */
+  void set_pattern(const std::string& pattern);
+
+  /**
+   * @brief Register a callback function to be run in place of usual log call
+   *
+   * @param[in] callback the function to be run on all logged messages
+   */
+  void set_callback(void (*callback)(int lvl, const char* msg));
+
+  /**
+   * @brief Register a flush function compatible with the registered callback
+   *
+   * @param[in] flush the function to use when flushing logs
+   */
+  void set_flush(void (*flush)());
+
+  /**
+   * @brief Tells whether messages will be logged for the given log level
+   *
+   * @param[in] level log level to be checked for
+   * @return true if messages will be logged for this level, else false
+   */
+  bool should_log_for(int level) const;
+  /**
+   * @brief Query for the current log level
+   *
+   * @return the current log level
+   */
+  int get_level() const;
+
+  /**
+   * @brief Get the current logging pattern
+   * @return the pattern
+   */
+  std::string get_pattern() const;
+
+  /**
+   * @brief Main logging method
+   *
+   * @param[in] level logging level of this message
+   * @param[in] fmt   C-like format string, followed by respective params
+   */
+  void log(int level, const char* fmt, ...);
+
+  /**
+   * @brief Flush logs by calling flush on underlying logger
+   */
+  void flush();
+
+  ~logger();
+
+ private:
+  logger();
+  // pimpl pattern:
+  // https://learn.microsoft.com/en-us/cpp/cpp/pimpl-for-compile-time-encapsulation-modern-cpp?view=msvc-170
+  class impl;
+  std::unique_ptr<impl> pimpl;
+  static inline std::unordered_map<std::string, std::shared_ptr<raft::logger>> log_map;
+};  // class logger
+
+};  // namespace raft
diff --git a/cpp/include/raft/core/logger-inl.hpp b/cpp/include/raft/core/logger-inl.hpp
new file mode 100644
index 0000000000..a90023d01f
--- /dev/null
+++ b/cpp/include/raft/core/logger-inl.hpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <stdarg.h>
+
+#include <algorithm>
+
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include <stdarg.h>
+
+#include "logger-macros.hpp"
+// The logger-ext.hpp file contains the class declaration of the logger class.
+// In this case, it is okay to include the logger-ext.hpp file because it
+// contains no RAFT_EXPLICIT template instantiations.
+#include "logger-ext.hpp"
+
+#define SPDLOG_HEADER_ONLY
+#include <raft/core/detail/callback_sink.hpp>
+#include <raft/core/detail/macros.hpp>        // RAFT_INLINE_CONDITIONAL
+#include <spdlog/sinks/stdout_color_sinks.h>  // NOLINT
+#include <spdlog/spdlog.h>                    // NOLINT
+
+namespace raft {
+
+namespace detail {
+
+inline std::string format(const char* fmt, va_list& vl)
+{
+  va_list vl_copy;
+  va_copy(vl_copy, vl);
+  int length = std::vsnprintf(nullptr, 0, fmt, vl_copy);
+  assert(length >= 0);
+  std::vector<char> buf(length + 1);
+  std::vsnprintf(buf.data(), length + 1, fmt, vl);
+  return std::string(buf.data());
+}
+
+inline std::string format(const char* fmt, ...)
+{
+  va_list vl;
+  va_start(vl, fmt);
+  std::string str = format(fmt, vl);
+  va_end(vl);
+  return str;
+}
+
+inline int convert_level_to_spdlog(int level)
+{
+  level = std::max(RAFT_LEVEL_OFF, std::min(RAFT_LEVEL_TRACE, level));
+  return RAFT_LEVEL_TRACE - level;
+}
+
+}  // namespace detail
+
+class logger::impl {  // defined privately here
+                      // ... all private data and functions: all of these
+                      //     can now change without recompiling callers ...
+ public:
+  std::shared_ptr<spdlog::sinks::callback_sink_mt> sink;
+  std::shared_ptr<spdlog::logger> spdlogger;
+  std::string cur_pattern;
+  int cur_level;
+
+  impl(std::string const& name_ = "")
+    : sink{std::make_shared<spdlog::sinks::callback_sink_mt>()},
+      spdlogger{std::make_shared<spdlog::logger>(name_, sink)},
+      cur_pattern()
+  {
+  }
+};  // class logger::impl
+
+RAFT_INLINE_CONDITIONAL logger::logger(std::string const& name_) : pimpl(new impl(name_))
+{
+  set_pattern(default_log_pattern);
+  set_level(RAFT_ACTIVE_LEVEL);
+}
+
+RAFT_INLINE_CONDITIONAL logger& logger::get(std::string const& name)
+{
+  if (log_map.find(name) == log_map.end()) { log_map[name] = std::make_shared<raft::logger>(name); }
+  return *log_map[name];
+}
+
+RAFT_INLINE_CONDITIONAL void logger::set_level(int level)
+{
+  level = raft::detail::convert_level_to_spdlog(level);
+  pimpl->spdlogger->set_level(static_cast<spdlog::level::level_enum>(level));
+}
+
+RAFT_INLINE_CONDITIONAL void logger::set_pattern(const std::string& pattern)
+{
+  pimpl->cur_pattern = pattern;
+  pimpl->spdlogger->set_pattern(pattern);
+}
+
+RAFT_INLINE_CONDITIONAL void logger::set_callback(void (*callback)(int lvl, const char* msg))
+{
+  pimpl->sink->set_callback(callback);
+}
+
+RAFT_INLINE_CONDITIONAL void logger::set_flush(void (*flush)()) { pimpl->sink->set_flush(flush); }
+
+RAFT_INLINE_CONDITIONAL bool logger::should_log_for(int level) const
+{
+  level        = raft::detail::convert_level_to_spdlog(level);
+  auto level_e = static_cast<spdlog::level::level_enum>(level);
+  return pimpl->spdlogger->should_log(level_e);
+}
+
+RAFT_INLINE_CONDITIONAL int logger::get_level() const
+{
+  auto level_e = pimpl->spdlogger->level();
+  return RAFT_LEVEL_TRACE - static_cast<int>(level_e);
+}
+
+RAFT_INLINE_CONDITIONAL std::string logger::get_pattern() const { return pimpl->cur_pattern; }
+
+RAFT_INLINE_CONDITIONAL void logger::log(int level, const char* fmt, ...)
+{
+  level        = raft::detail::convert_level_to_spdlog(level);
+  auto level_e = static_cast<spdlog::level::level_enum>(level);
+  // explicit check to make sure that we only expand messages when required
+  if (pimpl->spdlogger->should_log(level_e)) {
+    va_list vl;
+    va_start(vl, fmt);
+    auto msg = raft::detail::format(fmt, vl);
+    va_end(vl);
+    pimpl->spdlogger->log(level_e, msg);
+  }
+}
+
+RAFT_INLINE_CONDITIONAL void logger::flush() { pimpl->spdlogger->flush(); }
+
+RAFT_INLINE_CONDITIONAL logger::~logger() {}
+
+};  // namespace raft
diff --git a/cpp/include/raft/core/logger-macros.hpp b/cpp/include/raft/core/logger-macros.hpp
new file mode 100644
index 0000000000..5ddb072067
--- /dev/null
+++ b/cpp/include/raft/core/logger-macros.hpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+/**
+ * @defgroup logging levels used in raft
+ *
+ * @note exactly match the corresponding ones (but reverse in terms of value)
+ *       in spdlog for wrapping purposes
+ *
+ * @{
+ */
+#define RAFT_LEVEL_TRACE    6
+#define RAFT_LEVEL_DEBUG    5
+#define RAFT_LEVEL_INFO     4
+#define RAFT_LEVEL_WARN     3
+#define RAFT_LEVEL_ERROR    2
+#define RAFT_LEVEL_CRITICAL 1
+#define RAFT_LEVEL_OFF      0
+/** @} */
+
+#if !defined(RAFT_ACTIVE_LEVEL)
+#define RAFT_ACTIVE_LEVEL RAFT_LEVEL_INFO
+#endif
+
+/**
+ * @defgroup loggerMacros Helper macros for dealing with logging
+ * @{
+ */
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE)
+#define RAFT_LOG_TRACE(fmt, ...)                                          \
+  do {                                                                    \
+    std::stringstream ss;                                                 \
+    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
+    ss << raft::detail::format(fmt, ##__VA_ARGS__);                       \
+    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \
+  } while (0)
+#else
+#define RAFT_LOG_TRACE(fmt, ...) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE)
+#define RAFT_LOG_TRACE_VEC(ptr, len)                                      \
+  do {                                                                    \
+    std::stringstream ss;                                                 \
+    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
+    print_vector(#ptr, ptr, len, ss);                                     \
+    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \
+  } while (0)
+#else
+#define RAFT_LOG_TRACE_VEC(ptr, len) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG)
+#define RAFT_LOG_DEBUG(fmt, ...)                                          \
+  do {                                                                    \
+    std::stringstream ss;                                                 \
+    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
+    ss << raft::detail::format(fmt, ##__VA_ARGS__);                       \
+    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_DEBUG, ss.str().c_str()); \
+  } while (0)
+#else
+#define RAFT_LOG_DEBUG(fmt, ...) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_INFO)
+#define RAFT_LOG_INFO(fmt, ...) \
+  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_INFO, fmt, ##__VA_ARGS__)
+#else
+#define RAFT_LOG_INFO(fmt, ...) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_WARN)
+#define RAFT_LOG_WARN(fmt, ...) \
+  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_WARN, fmt, ##__VA_ARGS__)
+#else
+#define RAFT_LOG_WARN(fmt, ...) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_ERROR)
+#define RAFT_LOG_ERROR(fmt, ...) \
+  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_ERROR, fmt, ##__VA_ARGS__)
+#else
+#define RAFT_LOG_ERROR(fmt, ...) void(0)
+#endif
+
+#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_CRITICAL)
+#define RAFT_LOG_CRITICAL(fmt, ...) \
+  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_CRITICAL, fmt, ##__VA_ARGS__)
+#else
+#define RAFT_LOG_CRITICAL(fmt, ...) void(0)
+#endif
+/** @} */
diff --git a/cpp/include/raft/core/logger.hpp b/cpp/include/raft/core/logger.hpp
index 3984ec042a..59968ff5e5 100644
--- a/cpp/include/raft/core/logger.hpp
+++ b/cpp/include/raft/core/logger.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,310 +15,10 @@
  */
 #pragma once
 
-#ifndef __RAFT_RT_LOGGER
-#define __RAFT_RT_LOGGER
+#include "logger-macros.hpp"
 
-#include <stdarg.h>
+#include "logger-ext.hpp"
 
-#include <algorithm>
-
-#include <memory>
-#include <mutex>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-
-#include <stdarg.h>
-
-#define SPDLOG_HEADER_ONLY
-#include <raft/core/detail/callback_sink.hpp>
-#include <raft/util/cudart_utils.hpp>
-#include <spdlog/sinks/stdout_color_sinks.h>  // NOLINT
-#include <spdlog/spdlog.h>                    // NOLINT
-
-/**
- * @defgroup logging levels used in raft
- *
- * @note exactly match the corresponding ones (but reverse in terms of value)
- *       in spdlog for wrapping purposes
- *
- * @{
- */
-#define RAFT_LEVEL_TRACE    6
-#define RAFT_LEVEL_DEBUG    5
-#define RAFT_LEVEL_INFO     4
-#define RAFT_LEVEL_WARN     3
-#define RAFT_LEVEL_ERROR    2
-#define RAFT_LEVEL_CRITICAL 1
-#define RAFT_LEVEL_OFF      0
-/** @} */
-
-#if !defined(RAFT_ACTIVE_LEVEL)
-#define RAFT_ACTIVE_LEVEL RAFT_LEVEL_INFO
+#if !defined(RAFT_COMPILED)
+#include "logger-inl.hpp"
 #endif
-
-namespace raft {
-
-static const std::string RAFT_NAME = "raft";
-static const std::string default_log_pattern("[%L] [%H:%M:%S.%f] %v");
-
-namespace detail {
-
-/**
- * @defgroup CStringFormat Expand a C-style format string
- *
- * @brief Expands C-style formatted string into std::string
- *
- * @param[in] fmt format string
- * @param[in] vl  respective values for each of format modifiers in the string
- *
- * @return the expanded `std::string`
- *
- * @{
- */
-inline std::string format(const char* fmt, va_list& vl)
-{
-  va_list vl_copy;
-  va_copy(vl_copy, vl);
-  int length = std::vsnprintf(nullptr, 0, fmt, vl_copy);
-  assert(length >= 0);
-  std::vector<char> buf(length + 1);
-  std::vsnprintf(buf.data(), length + 1, fmt, vl);
-  return std::string(buf.data());
-}
-
-inline std::string format(const char* fmt, ...)
-{
-  va_list vl;
-  va_start(vl, fmt);
-  std::string str = format(fmt, vl);
-  va_end(vl);
-  return str;
-}
-/** @} */
-
-inline int convert_level_to_spdlog(int level)
-{
-  level = std::max(RAFT_LEVEL_OFF, std::min(RAFT_LEVEL_TRACE, level));
-  return RAFT_LEVEL_TRACE - level;
-}
-
-}  // namespace detail
-
-/**
- * @brief The main Logging class for raft library.
- *
- * This class acts as a thin wrapper over the underlying `spdlog` interface. The
- * design is done in this way in order to avoid us having to also ship `spdlog`
- * header files in our installation.
- *
- * @todo This currently only supports logging to stdout. Need to add support in
- *       future to add custom loggers as well [Issue #2046]
- */
-class logger {
- public:
-  // @todo setting the logger once per process with
-  logger(std::string const& name_ = "")
-    : sink{std::make_shared<spdlog::sinks::callback_sink_mt>()},
-      spdlogger{std::make_shared<spdlog::logger>(name_, sink)},
-      cur_pattern()
-  {
-    set_pattern(default_log_pattern);
-    set_level(RAFT_ACTIVE_LEVEL);
-  }
-  /**
-   * @brief Singleton method to get the underlying logger object
-   *
-   * @return the singleton logger object
-   */
-  static logger& get(std::string const& name = "")
-  {
-    if (log_map.find(name) == log_map.end()) {
-      log_map[name] = std::make_shared<raft::logger>(name);
-    }
-    return *log_map[name];
-  }
-
-  /**
-   * @brief Set the logging level.
-   *
-   * Only messages with level equal or above this will be printed
-   *
-   * @param[in] level logging level
-   *
-   * @note The log level will actually be set only if the input is within the
-   *       range [RAFT_LEVEL_TRACE, RAFT_LEVEL_OFF]. If it is not, then it'll
-   *       be ignored. See documentation of decisiontree for how this gets used
-   */
-  void set_level(int level)
-  {
-    level = raft::detail::convert_level_to_spdlog(level);
-    spdlogger->set_level(static_cast<spdlog::level::level_enum>(level));
-  }
-
-  /**
-   * @brief Set the logging pattern
-   *
-   * @param[in] pattern the pattern to be set. Refer this link
-   *                    https://github.com/gabime/spdlog/wiki/3.-Custom-formatting
-   *                    to know the right syntax of this pattern
-   */
-  void set_pattern(const std::string& pattern)
-  {
-    cur_pattern = pattern;
-    spdlogger->set_pattern(pattern);
-  }
-
-  /**
-   * @brief Register a callback function to be run in place of usual log call
-   *
-   * @param[in] callback the function to be run on all logged messages
-   */
-  void set_callback(void (*callback)(int lvl, const char* msg)) { sink->set_callback(callback); }
-
-  /**
-   * @brief Register a flush function compatible with the registered callback
-   *
-   * @param[in] flush the function to use when flushing logs
-   */
-  void set_flush(void (*flush)()) { sink->set_flush(flush); }
-
-  /**
-   * @brief Tells whether messages will be logged for the given log level
-   *
-   * @param[in] level log level to be checked for
-   * @return true if messages will be logged for this level, else false
-   */
-  bool should_log_for(int level) const
-  {
-    level        = raft::detail::convert_level_to_spdlog(level);
-    auto level_e = static_cast<spdlog::level::level_enum>(level);
-    return spdlogger->should_log(level_e);
-  }
-
-  /**
-   * @brief Query for the current log level
-   *
-   * @return the current log level
-   */
-  int get_level() const
-  {
-    auto level_e = spdlogger->level();
-    return RAFT_LEVEL_TRACE - static_cast<int>(level_e);
-  }
-
-  /**
-   * @brief Get the current logging pattern
-   * @return the pattern
-   */
-  std::string get_pattern() const { return cur_pattern; }
-
-  /**
-   * @brief Main logging method
-   *
-   * @param[in] level logging level of this message
-   * @param[in] fmt   C-like format string, followed by respective params
-   */
-  void log(int level, const char* fmt, ...)
-  {
-    level        = raft::detail::convert_level_to_spdlog(level);
-    auto level_e = static_cast<spdlog::level::level_enum>(level);
-    // explicit check to make sure that we only expand messages when required
-    if (spdlogger->should_log(level_e)) {
-      va_list vl;
-      va_start(vl, fmt);
-      auto msg = raft::detail::format(fmt, vl);
-      va_end(vl);
-      spdlogger->log(level_e, msg);
-    }
-  }
-
-  /**
-   * @brief Flush logs by calling flush on underlying logger
-   */
-  void flush() { spdlogger->flush(); }
-
-  ~logger() {}
-
- private:
-  logger();
-
-  static inline std::unordered_map<std::string, std::shared_ptr<raft::logger>> log_map;
-  std::shared_ptr<spdlog::sinks::callback_sink_mt> sink;
-  std::shared_ptr<spdlog::logger> spdlogger;
-  std::string cur_pattern;
-  int cur_level;
-};  // class logger
-
-};  // namespace raft
-
-/**
- * @defgroup loggerMacros Helper macros for dealing with logging
- * @{
- */
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE)
-#define RAFT_LOG_TRACE(fmt, ...)                                          \
-  do {                                                                    \
-    std::stringstream ss;                                                 \
-    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
-    ss << raft::detail::format(fmt, ##__VA_ARGS__);                       \
-    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \
-  } while (0)
-#else
-#define RAFT_LOG_TRACE(fmt, ...) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_TRACE)
-#define RAFT_LOG_TRACE_VEC(ptr, len)                                      \
-  do {                                                                    \
-    std::stringstream ss;                                                 \
-    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
-    print_vector(#ptr, ptr, len, ss);                                     \
-    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_TRACE, ss.str().c_str()); \
-  } while (0)
-#else
-#define RAFT_LOG_TRACE_VEC(ptr, len) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG)
-#define RAFT_LOG_DEBUG(fmt, ...)                                          \
-  do {                                                                    \
-    std::stringstream ss;                                                 \
-    ss << raft::detail::format("%s:%d ", __FILE__, __LINE__);             \
-    ss << raft::detail::format(fmt, ##__VA_ARGS__);                       \
-    raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_DEBUG, ss.str().c_str()); \
-  } while (0)
-#else
-#define RAFT_LOG_DEBUG(fmt, ...) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_INFO)
-#define RAFT_LOG_INFO(fmt, ...) \
-  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_INFO, fmt, ##__VA_ARGS__)
-#else
-#define RAFT_LOG_INFO(fmt, ...) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_WARN)
-#define RAFT_LOG_WARN(fmt, ...) \
-  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_WARN, fmt, ##__VA_ARGS__)
-#else
-#define RAFT_LOG_WARN(fmt, ...) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_ERROR)
-#define RAFT_LOG_ERROR(fmt, ...) \
-  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_ERROR, fmt, ##__VA_ARGS__)
-#else
-#define RAFT_LOG_ERROR(fmt, ...) void(0)
-#endif
-
-#if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_CRITICAL)
-#define RAFT_LOG_CRITICAL(fmt, ...) \
-  raft::logger::get(RAFT_NAME).log(RAFT_LEVEL_CRITICAL, fmt, ##__VA_ARGS__)
-#else
-#define RAFT_LOG_CRITICAL(fmt, ...) void(0)
-#endif
-/** @} */
-
-#endif
\ No newline at end of file
diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index e1209835c9..618e307f5d 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -25,6 +25,7 @@
 #include <stddef.h>
 
 #include <raft/core/detail/macros.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/core/host_device_accessor.hpp>
 #include <raft/core/mdspan.hpp>
 #include <raft/core/mdspan_types.hpp>
diff --git a/cpp/include/raft/core/resource/device_memory_resource.hpp b/cpp/include/raft/core/resource/device_memory_resource.hpp
index 35ae3d715f..ebc41e0f8e 100644
--- a/cpp/include/raft/core/resource/device_memory_resource.hpp
+++ b/cpp/include/raft/core/resource/device_memory_resource.hpp
@@ -18,6 +18,7 @@
 #include <raft/core/resource/resource_types.hpp>
 #include <raft/core/resources.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 
 namespace raft::resource {
 class device_memory_resource : public resource {
@@ -72,4 +73,4 @@ inline void set_workspace_resource(resources const& res, rmm::mr::device_memory_
 {
   res.add_resource_factory(std::make_shared<workspace_resource_factory>(mr));
 };
-}  // namespace raft::resource
\ No newline at end of file
+}  // namespace raft::resource
diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp
index 4de7d43e76..e0f51b61b4 100644
--- a/cpp/include/raft/core/resources.hpp
+++ b/cpp/include/raft/core/resources.hpp
@@ -18,6 +18,7 @@
 #include "resource/resource_types.hpp"
 #include <algorithm>
 #include <mutex>
+#include <raft/core/error.hpp>  // RAFT_EXPECTS
 #include <raft/core/logger.hpp>
 #include <string>
 #include <vector>
@@ -128,4 +129,4 @@ class resources {
   mutable std::vector<pair_res_factory> factories_;
   mutable std::vector<pair_resource> resources_;
 };
-}  // namespace raft
\ No newline at end of file
+}  // namespace raft
diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
index 4b000add21..7ff886c677 100644
--- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
+++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
@@ -17,11 +17,12 @@
 #pragma once
 
 #include "gram_matrix.cuh"
-#include <raft/util/cuda_utils.cuh>
 
+#include <raft/distance/detail/kernels/rbf_fin_op.cuh>
 #include <raft/distance/distance.cuh>
 #include <raft/linalg/gemm.cuh>
 #include <raft/sparse/linalg/norm.cuh>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft::distance::kernels::detail {
 
@@ -718,7 +719,7 @@ class RBFKernel : public GramMatrixBase<math_t> {
     math_t gain   = this->gain;
     using index_t = int64_t;
 
-    auto fin_op = [gain] __device__(math_t d_val, index_t idx) { return exp(-gain * d_val); };
+    rbf_fin_op fin_op{gain};
     raft::distance::distance<raft::distance::DistanceType::L2Unexpanded,
                              math_t,
                              math_t,
diff --git a/cpp/include/raft/distance/detail/kernels/rbf_fin_op.cuh b/cpp/include/raft/distance/detail/kernels/rbf_fin_op.cuh
new file mode 100644
index 0000000000..cd19675477
--- /dev/null
+++ b/cpp/include/raft/distance/detail/kernels/rbf_fin_op.cuh
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+/*
+ * This file defines rbf_fin_op, which is used in GramMatrixBase.
+ *
+ * This struct has been moved to a separate file, so that it is cheap to include
+ * in distance/distance-ext.cuh, where an instance of raft::distance::distance
+ * with the rbf_fin_op is instantiated.
+ *
+ */
+
+#include <raft/core/math.hpp>                 // raft::exp
+#include <raft/util/cuda_dev_essentials.cuh>  // HD
+
+namespace raft::distance::kernels::detail {
+
+/** @brief: Final op for Gram matrix with RBF kernel.
+ *
+ * Calculates output = e^(-gain * in)
+ *
+ */
+template <typename OutT>
+struct rbf_fin_op {
+  OutT gain;
+
+  explicit HD rbf_fin_op(OutT gain_) noexcept : gain(gain_) {}
+
+  template <typename... Args>
+  HDI OutT operator()(OutT d_val, Args... unused_args)
+  {
+    return raft::exp(-gain * d_val);
+  }
+};  // struct rbf_fin_op
+
+}  // namespace raft::distance::kernels::detail
diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
new file mode 100644
index 0000000000..dd58ab4328
--- /dev/null
+++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/operators.hpp>                          // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>    // ops::*
+#include <raft/distance/detail/distance_ops/cutlass.cuh>    // ops::has_cutlass_op
+#include <raft/distance/detail/kernels/rbf_fin_op.cuh>      // rbf_fin_op
+#include <raft/distance/detail/pairwise_matrix/params.cuh>  // pairwise_matrix_params
+#include <raft/util/raft_explicit.hpp>                      // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::distance::detail {
+
+template <typename OpT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename FinOpT,
+          typename IdxT = int>
+void pairwise_matrix_dispatch(OpT distance_op,
+                              IdxT m,
+                              IdxT n,
+                              IdxT k,
+                              const DataT* x,
+                              const DataT* y,
+                              const DataT* x_norm,
+                              const DataT* y_norm,
+                              OutT* out,
+                              FinOpT fin_op,
+                              cudaStream_t stream,
+                              bool is_row_major) RAFT_EXPLICIT;
+
+};      // namespace raft::distance::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  extern template void raft::distance::detail::                                        \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+/*
+ * Hierarchy of instantiations:
+ *
+ * This file defines extern template instantiations of the distance kernels. The
+ * instantiation of the public API is handled in raft/distance/distance-ext.cuh.
+ *
+ * After adding an instance here, make sure to also add the instance there.
+ */
+
+// The following two instances are used in the RBF kernel object. Note the use of int64_t for the
+// index type.
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  float,
+  float,
+  float,
+  raft::distance::kernels::detail::rbf_fin_op<float>,
+  int64_t);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::distance::kernels::detail::rbf_fin_op<double>,
+  int64_t);
+
+// Rest of instances
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::canberra_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::canberra_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::correlation_distance_op,
+  float,
+  float,
+  float,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::correlation_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::cosine_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::cosine_distance_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hamming_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hamming_distance_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hellinger_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hellinger_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::jensen_shannon_distance_op,
+  float,
+  float,
+  float,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::jensen_shannon_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::kl_divergence_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::kl_divergence_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l1_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l1_distance_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_exp_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_exp_distance_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l_inf_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l_inf_distance_op, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::lp_unexp_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::lp_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::russel_rao_distance_op, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::russel_rao_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
new file mode 100644
index 0000000000..bb4422735b
--- /dev/null
+++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+/* This file has two responsibilities:
+ *
+ * 1. Dispatch to the correct implementation of a kernel based on the
+ *    architecture of the device on which the kernel will be launched. For
+ *    instance, the cosine distance has a CUTLASS-based implementation that can
+ *    be used on SM80+ and the normal implementation that is used on older
+ *    architectures.
+ *
+ * 2. Provide concise function templates that can be instantiated in
+ *    src/distance/detail/pairwise_matrix/. Previously,
+ *    raft::distance::detail::distance was instantiated. The function
+ *    necessarily required a large set of include files, which slowed down the
+ *    build. The raft::distance::detail::pairwise_matrix_arch_dispatch functions
+ *    do not require as large an include files set, which speeds up the build.
+ */
+
+#include <raft/distance/detail/distance_ops/cutlass.cuh>           // ops::has_cutlass_op
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>  // dispatch_sm60
+#include <raft/distance/detail/pairwise_matrix/params.cuh>         // pairwise_matrix_params
+#include <raft/util/arch.cuh>                                      // raft::util::arch::SM_*
+
+// NOTE: to minimize compile times, we do not include dispatch_sm80.cuh.
+// Including dispatch_sm80.cuh can slow down compile times (due to CUTLASS).
+// Therefore, it is the including file's responsibility to include the correct
+// dispatch_smXX.cuh headers, as is done in raft/distance/detail/distance.cuh
+// and src/distance/detail/pairwise_matrix/dispatch_*.cu.
+
+namespace raft::distance::detail {
+
+// This forward-declaration ensures that we do not need to include
+// dispatch_sm80.cuh if we are not calling it in practice. This makes compiling
+// all the non-CUTLASS based distance instantiations faster. For CUTLASS-based
+// distances, dispatch_sm80.cuh has to be included by the file including this
+// file.
+template <typename OpT,
+          typename IdxT,
+          typename DataT,
+          typename OutT,
+          typename FinOpT,
+          typename SM_compat_t>
+void pairwise_matrix_sm80_dispatch(OpT,
+                                   pairwise_matrix_params<IdxT, DataT, OutT, FinOpT>,
+                                   SM_compat_t,
+                                   cudaStream_t);
+
+template <typename OpT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename FinOpT,
+          typename IdxT = int>
+void pairwise_matrix_dispatch(OpT distance_op,
+                              IdxT m,
+                              IdxT n,
+                              IdxT k,
+                              const DataT* x,
+                              const DataT* y,
+                              const DataT* x_norm,
+                              const DataT* y_norm,
+                              OutT* out,
+                              FinOpT fin_op,
+                              cudaStream_t stream,
+                              bool is_row_major)
+{
+  // Create kernel parameter struct. Flip x and y if column major.
+  IdxT ldx    = is_row_major ? k : m;
+  IdxT ldy    = is_row_major ? k : n;
+  IdxT ld_out = is_row_major ? n : m;
+
+  pairwise_matrix_params<IdxT, DataT, OutT, FinOpT> params{
+    m, n, k, ldx, ldy, ld_out, x, y, x_norm, y_norm, out, fin_op, is_row_major};
+
+  if (!params.is_row_major) { params.flip_x_and_y(); }
+
+  // On CUDA 12:
+  // - always execute normal kernel
+  //
+  // On CUDA 11 and below:
+  // - execute CUTLASS-based kernel on SM_80 and above
+  // - execute normal kernel below SM_80
+  namespace arch = raft::util::arch;
+
+  constexpr bool is_ctk_12              = __CUDACC_VER_MAJOR__ == 12;
+  constexpr bool cutlass_op_unavailable = !ops::has_cutlass_op<OpT>();
+
+  if constexpr (is_ctk_12 || cutlass_op_unavailable) {
+    // Always execute legacy kernels on CUDA 12
+    auto any_range = arch::SM_range(arch::SM_min(), arch::SM_future());
+    pairwise_matrix_sm60_dispatch(distance_op, params, any_range, stream);
+  } else {
+    auto cutlass_range = arch::SM_range(arch::SM_80(), arch::SM_future());
+    auto legacy_range  = arch::SM_range(arch::SM_min(), arch::SM_80());
+
+    // Get pointer to SM60 kernel to determine the runtime architecture of the
+    // current system. Other methods to determine the architecture (that do not
+    // require a pointer) can be error prone. See:
+    // https://github.com/NVIDIA/cub/issues/545
+    auto sm60_wrapper = pairwise_matrix_sm60_get_wrapper(distance_op, params, legacy_range);
+    void* kernel_ptr  = reinterpret_cast<void*>(sm60_wrapper.kernel_ptr);
+    auto runtime_arch = arch::kernel_runtime_arch(kernel_ptr);
+
+    if (cutlass_range.contains(runtime_arch)) {
+      // If device is SM_80 or later, use CUTLASS-based kernel.
+      pairwise_matrix_sm80_dispatch(distance_op, params, cutlass_range, stream);
+    } else {
+      // Reuse kernel wrapper that we obtained above. This avoids performing the
+      // dispatch twice.
+      sm60_wrapper.launch(distance_op, params, stream);
+    }
+  }
+}
+
+};  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch.cuh
index e04b56ee8a..4a52b7ebe7 100644
--- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch.cuh
@@ -15,123 +15,10 @@
  */
 #pragma once
 
-/* This file has two responsibilities:
- *
- * 1. Dispatch to the correct implementation of a kernel based on the
- *    architecture of the device on which the kernel will be launched. For
- *    instance, the cosine distance has a CUTLASS-based implementation that can
- *    be used on SM80+ and the normal implementation that is used on older
- *    architectures.
- *
- * 2. Provide concise function templates that can be instantiated in
- *    src/distance/distance/specializations/detail/. Previously,
- *    raft::distance::detail::distance was instantiated. The function
- *    necessarily required a large set of include files, which slowed down the
- *    build. The raft::distance::detail::pairwise_matrix_arch_dispatch functions
- *    do not require as large an include files set, which speeds up the build.
- */
-
-#include <raft/distance/detail/distance_ops/cutlass.cuh>           // ops::has_cutlass_op
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>  // dispatch_sm60
-#include <raft/distance/detail/pairwise_matrix/params.cuh>         // pairwise_matrix_params
-#include <raft/util/arch.cuh>                                      // raft::util::arch::SM_*
-
-// NOTE: to minimize compile times, we do not include dispatch_sm80.cuh.
-// Including dispatch_sm80.cuh can slow down compile times (due to CUTLASS).
-// Therefore, it is the including file's responsibility to include the correct
-// dispatch_smXX.cuh headers, as is done in raft/distance/detail/distance.cuh
-// and the specializations in src/distance/distance/specializations/detail/.
-
-namespace raft::distance::detail {
-
-// This forward-declaration ensures that we do not need to include
-// dispatch_sm80.cuh if we are not calling it in practice. This makes compiling
-// all the non-CUTLASS based distance specializations faster. For CUTLASS-based
-// distances, dispatch_sm80.cuh has to be included by the file including this
-// file.
-template <typename OpT,
-          typename IdxT,
-          typename DataT,
-          typename OutT,
-          typename FinOpT,
-          typename SM_compat_t>
-void pairwise_matrix_sm80_dispatch(OpT,
-                                   pairwise_matrix_params<IdxT, DataT, OutT, FinOpT>,
-                                   SM_compat_t,
-                                   cudaStream_t);
-
-template <typename OpT, typename IdxT, typename DataT, typename OutT, typename FinOpT>
-void pairwise_matrix_instantiation_point(OpT distance_op,
-                                         pairwise_matrix_params<IdxT, DataT, OutT, FinOpT> params,
-                                         cudaStream_t stream)
-{
-  // On CUDA 12:
-  // - always execute normal kernel
-  //
-  // On CUDA 11 and below:
-  // - execute CUTLASS-based kernel on SM_80 and above
-  // - execute normal kernel below SM_80
-  namespace arch = raft::util::arch;
-
-  constexpr bool is_ctk_12              = __CUDACC_VER_MAJOR__ == 12;
-  constexpr bool cutlass_op_unavailable = !ops::has_cutlass_op<OpT>();
-
-  if constexpr (is_ctk_12 || cutlass_op_unavailable) {
-    // Always execute legacy kernels on CUDA 12
-    auto any_range = arch::SM_range(arch::SM_min(), arch::SM_future());
-    pairwise_matrix_sm60_dispatch(distance_op, params, any_range, stream);
-  } else {
-    auto cutlass_range = arch::SM_range(arch::SM_80(), arch::SM_future());
-    auto legacy_range  = arch::SM_range(arch::SM_min(), arch::SM_80());
-
-    // Get pointer to SM60 kernel to determine the runtime architecture of the
-    // current system. Other methods to determine the architecture (that do not
-    // require a pointer) can be error prone. See:
-    // https://github.com/NVIDIA/cub/issues/545
-    auto sm60_wrapper = pairwise_matrix_sm60_get_wrapper(distance_op, params, legacy_range);
-    void* kernel_ptr  = reinterpret_cast<void*>(sm60_wrapper.kernel_ptr);
-    auto runtime_arch = arch::kernel_runtime_arch(kernel_ptr);
-
-    if (cutlass_range.contains(runtime_arch)) {
-      // If device is SM_80 or later, use CUTLASS-based kernel.
-      pairwise_matrix_sm80_dispatch(distance_op, params, cutlass_range, stream);
-    } else {
-      // Reuse kernel wrapper that we obtained above. This avoids performing the
-      // dispatch twice.
-      sm60_wrapper.launch(distance_op, params, stream);
-    }
-  }
-}
-
-template <typename OpT,
-          typename DataT,
-          typename AccT,
-          typename OutT,
-          typename FinOpT,
-          typename IdxT = int>
-void pairwise_matrix_dispatch(OpT distance_op,
-                              IdxT m,
-                              IdxT n,
-                              IdxT k,
-                              const DataT* x,
-                              const DataT* y,
-                              const DataT* x_norm,
-                              const DataT* y_norm,
-                              OutT* out,
-                              FinOpT fin_op,
-                              cudaStream_t stream,
-                              bool is_row_major)
-{
-  // Create kernel parameter struct. Flip x and y if column major.
-  IdxT ldx    = is_row_major ? k : m;
-  IdxT ldy    = is_row_major ? k : n;
-  IdxT ld_out = is_row_major ? n : m;
-
-  pairwise_matrix_params<IdxT, DataT, OutT, FinOpT> params{
-    m, n, k, ldx, ldy, ld_out, x, y, x_norm, y_norm, out, fin_op, is_row_major};
-
-  if (!params.is_row_major) { params.flip_x_and_y(); }
-  pairwise_matrix_instantiation_point(distance_op, params, stream);
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "dispatch-inl.cuh"
+#endif
 
-};  // namespace raft::distance::detail
+#ifdef RAFT_COMPILED
+#include "dispatch-ext.cuh"
+#endif
diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh
new file mode 100644
index 0000000000..3f7f2b0a23
--- /dev/null
+++ b/cpp/include/raft/distance/distance-ext.cuh
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>                  // raft::device_matrix_view
+#include <raft/core/operators.hpp>                      // raft::identity_op
+#include <raft/core/resources.hpp>                      // raft::resources
+#include <raft/distance/detail/kernels/rbf_fin_op.cuh>  // rbf_fin_op
+#include <raft/distance/distance_types.hpp>             // raft::distance::DistanceType
+#include <raft/util/raft_explicit.hpp>                  // RAFT_EXPLICIT
+#include <rmm/device_uvector.hpp>                       // rmm::device_uvector
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft {
+namespace distance {
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename FinalLambda,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              void* workspace,
+              size_t worksize,
+              FinalLambda fin_op,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              void* workspace,
+              size_t worksize,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+size_t getWorkspaceSize(const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k) RAFT_EXPLICIT;
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int,
+          typename layout>
+size_t getWorkspaceSize(raft::device_matrix_view<DataT, IdxT, layout> const& x,
+                        raft::device_matrix_view<DataT, IdxT, layout> const& y) RAFT_EXPLICIT;
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <typename Type, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       const Type* x,
+                       const Type* y,
+                       Type* dist,
+                       IdxT m,
+                       IdxT n,
+                       IdxT k,
+                       rmm::device_uvector<char>& workspace,
+                       raft::distance::DistanceType metric,
+                       bool isRowMajor = true,
+                       Type metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <typename Type, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       const Type* x,
+                       const Type* y,
+                       Type* dist,
+                       IdxT m,
+                       IdxT n,
+                       IdxT k,
+                       raft::distance::DistanceType metric,
+                       bool isRowMajor = true,
+                       Type metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename layout = raft::layout_c_contiguous,
+          typename IdxT   = int>
+void distance(raft::resources const& handle,
+              raft::device_matrix_view<DataT, IdxT, layout> const x,
+              raft::device_matrix_view<DataT, IdxT, layout> const y,
+              raft::device_matrix_view<OutT, IdxT, layout> dist,
+              DataT metric_arg = 2.0f) RAFT_EXPLICIT;
+
+template <typename Type, typename layout = layout_c_contiguous, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       device_matrix_view<Type, IdxT, layout> const x,
+                       device_matrix_view<Type, IdxT, layout> const y,
+                       device_matrix_view<Type, IdxT, layout> dist,
+                       raft::distance::DistanceType metric,
+                       Type metric_arg = 2.0f) RAFT_EXPLICIT;
+
+};      // namespace distance
+};      // namespace raft
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+/*
+ * Hierarchy of instantiations:
+ *
+ * This file defines the extern template instantiations for the public API of
+ * raft::distance. To improve compile times, the extern template instantiation
+ * of the distance kernels is handled in
+ * distance/detail/pairwise_matrix/dispatch-ext.cuh.
+ *
+ * After adding an instance here, make sure to also add the instance to
+ * dispatch-ext.cuh and the corresponding .cu files.
+ */
+
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT)       \
+  extern template void raft::distance::distance<DT, DataT, AccT, OutT, FinalLambda, IdxT>( \
+    raft::resources const& handle,                                                         \
+    const DataT* x,                                                                        \
+    const DataT* y,                                                                        \
+    OutT* dist,                                                                            \
+    IdxT m,                                                                                \
+    IdxT n,                                                                                \
+    IdxT k,                                                                                \
+    void* workspace,                                                                       \
+    size_t worksize,                                                                       \
+    FinalLambda fin_op,                                                                    \
+    bool isRowMajor,                                                                       \
+    DataT metric_arg)
+
+// The following two instances are used in test/distance/gram.cu. Note the use
+// of int64_t for the index type.
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::distance::kernels::detail::rbf_fin_op<float>,
+                                   int64_t);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::distance::kernels::detail::rbf_fin_op<double>,
+                                   int64_t);
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::identity_op,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_distance
+
+// Same, but without raft::identity_op
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT)       \
+  extern template void raft::distance::distance<DT, DataT, AccT, OutT, IdxT>( \
+    raft::resources const& handle,                                            \
+    const DataT* x,                                                           \
+    const DataT* y,                                                           \
+    OutT* dist,                                                               \
+    IdxT m,                                                                   \
+    IdxT n,                                                                   \
+    IdxT k,                                                                   \
+    void* workspace,                                                          \
+    size_t worksize,                                                          \
+    bool isRowMajor,                                                          \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_distance
+
+// Same, but without workspace
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT)       \
+  extern template void raft::distance::distance<DT, DataT, AccT, OutT, IdxT>( \
+    raft::resources const& handle,                                            \
+    const DataT* x,                                                           \
+    const DataT* y,                                                           \
+    OutT* dist,                                                               \
+    IdxT m,                                                                   \
+    IdxT n,                                                                   \
+    IdxT k,                                                                   \
+    bool isRowMajor,                                                          \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT)         \
+  extern template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>( \
+    const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_getWorkspaceSize
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT, layout)         \
+  extern template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT, layout>( \
+    raft::device_matrix_view<DataT, IdxT, layout> const& x,                                        \
+    raft::device_matrix_view<DataT, IdxT, layout> const& y)
+
+// We could consider not taking template parameters for this function. The
+// number of instantiations seems a bit excessive..
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::InnerProduct,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::InnerProduct,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::JensenShannon,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::JensenShannon,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::KLDivergence,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::KLDivergence,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2Unexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_f_contiguous);
+
+#undef instantiate_raft_distance_getWorkspaceSize
+
+#define instantiate_raft_distance_pairwise_distance(DataT, IdxT)                               \
+  extern template void raft::distance::pairwise_distance(raft::resources const& handle,        \
+                                                         const DataT* x,                       \
+                                                         const DataT* y,                       \
+                                                         DataT* dist,                          \
+                                                         IdxT m,                               \
+                                                         IdxT n,                               \
+                                                         IdxT k,                               \
+                                                         rmm::device_uvector<char>& workspace, \
+                                                         raft::distance::DistanceType metric,  \
+                                                         bool isRowMajor,                      \
+                                                         DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, int);
+instantiate_raft_distance_pairwise_distance(double, int);
+
+#undef instantiate_raft_distance_pairwise_distance
+
+// Same, but without workspace
+#define instantiate_raft_distance_pairwise_distance(DataT, IdxT)                              \
+  extern template void raft::distance::pairwise_distance(raft::resources const& handle,       \
+                                                         const DataT* x,                      \
+                                                         const DataT* y,                      \
+                                                         DataT* dist,                         \
+                                                         IdxT m,                              \
+                                                         IdxT n,                              \
+                                                         IdxT k,                              \
+                                                         raft::distance::DistanceType metric, \
+                                                         bool isRowMajor,                     \
+                                                         DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, int);
+instantiate_raft_distance_pairwise_distance(double, int);
+
+#undef instantiate_raft_distance_pairwise_distance
+
+// Version with mdspan
+#define instantiate_raft_distance_distance(DistT, DataT, AccT, OutT, layout, IdxT)       \
+  extern template void raft::distance::distance<DistT, DataT, AccT, OutT, layout, IdxT>( \
+    raft::resources const& handle,                                                       \
+    raft::device_matrix_view<DataT, IdxT, layout> const x,                               \
+    raft::device_matrix_view<DataT, IdxT, layout> const y,                               \
+    raft::device_matrix_view<OutT, IdxT, layout> dist,                                   \
+    DataT metric_arg)
+
+// Again, we might want to consider reigning in the number of instantiations...
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::InnerProduct,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::InnerProduct,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::JensenShannon,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::JensenShannon,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::KLDivergence,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::KLDivergence,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::LpUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::LpUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_pairwise_distance(DataT, layout, IdxT) \
+  extern template void raft::distance::pairwise_distance(                \
+    raft::resources const& handle,                                       \
+    raft::device_matrix_view<DataT, IdxT, layout> const x,               \
+    raft::device_matrix_view<DataT, IdxT, layout> const y,               \
+    raft::device_matrix_view<DataT, IdxT, layout> dist,                  \
+    raft::distance::DistanceType metric,                                 \
+    DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_pairwise_distance(float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_pairwise_distance(double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_pairwise_distance(double, raft::layout_f_contiguous, int);
+
+#undef instantiate_raft_distance_pairwise_distance
diff --git a/cpp/include/raft/distance/distance-inl.cuh b/cpp/include/raft/distance/distance-inl.cuh
new file mode 100644
index 0000000000..3399443765
--- /dev/null
+++ b/cpp/include/raft/distance/distance-inl.cuh
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/distance/detail/distance.cuh>
+#include <raft/distance/distance_types.hpp>
+#include <rmm/device_uvector.hpp>
+#include <type_traits>
+
+#include <raft/core/device_mdspan.hpp>
+
+namespace raft {
+namespace distance {
+
+/**
+ * @defgroup pairwise_distance pointer-based pairwise distance prims
+ * @{
+ */
+
+/**
+ * @brief Evaluate pairwise distances with the user epilogue lamba allowed
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam FinalLambda user-defined epilogue lamba
+ * @tparam IdxT Index type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points
+ * @param y second set of points
+ * @param dist output distance matrix
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ * @param workspace temporary workspace needed for computations
+ * @param worksize number of bytes of the workspace
+ * @param fin_op the final gemm epilogue lambda
+ * @param isRowMajor whether the matrices are row-major or col-major
+ * @param metric_arg metric argument (used for Minkowski distance)
+ *
+ * @note fin_op: This is a device lambda which is supposed to operate upon the
+ * input which is AccT and returns the output in OutT. It's signature is
+ * as follows:  <pre>OutT fin_op(AccT in, int g_idx);</pre>. If one needs
+ * any other parameters, feel free to pass them via closure.
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename FinalLambda,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              void* workspace,
+              size_t worksize,
+              FinalLambda fin_op,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f)
+{
+  detail::distance<DistT, DataT, AccT, OutT, FinalLambda, IdxT>(
+    handle, x, y, dist, m, n, k, workspace, worksize, fin_op, isRowMajor, metric_arg);
+}
+
+/**
+ * @brief Evaluate pairwise distances for the simple use case
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam IdxT Index type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points
+ * @param y second set of points
+ * @param dist output distance matrix
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ * @param workspace temporary workspace needed for computations
+ * @param worksize number of bytes of the workspace
+ * @param isRowMajor whether the matrices are row-major or col-major
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              void* workspace,
+              size_t worksize,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f)
+{
+  detail::distance<DistT, DataT, AccT, OutT, IdxT>(
+    handle, x, y, dist, m, n, k, workspace, worksize, isRowMajor, metric_arg);
+}
+
+/**
+ * @brief Return the exact workspace size to compute the distance
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam IdxT Index type
+ * @param x first set of points
+ * @param y second set of points
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ *
+ * @note If the specified DistT doesn't need the workspace at all, it
+ * returns 0.
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+size_t getWorkspaceSize(const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+{
+  return detail::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>(x, y, m, n, k);
+}
+
+/**
+ * @brief Return the exact workspace size to compute the distance
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam IdxT Index type
+ * @param x first set of points (size m*k)
+ * @param y second set of points (size n*k)
+ * @return number of bytes needed in workspace
+ *
+ * @note If the specified DistT doesn't need the workspace at all, it
+ * returns 0.
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int,
+          typename layout>
+size_t getWorkspaceSize(raft::device_matrix_view<DataT, IdxT, layout> const& x,
+                        raft::device_matrix_view<DataT, IdxT, layout> const& y)
+{
+  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
+
+  return getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>(
+    x.data_handle(), y.data_handle(), x.extent(0), y.extent(0), x.extent(1));
+}
+
+/**
+ * @brief Evaluate pairwise distances for the simple use case
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam IdxT Index type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points
+ * @param y second set of points
+ * @param dist output distance matrix
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ * @param isRowMajor whether the matrices are row-major or col-major
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT = int>
+void distance(raft::resources const& handle,
+              const DataT* x,
+              const DataT* y,
+              OutT* dist,
+              IdxT m,
+              IdxT n,
+              IdxT k,
+              bool isRowMajor  = true,
+              DataT metric_arg = 2.0f)
+{
+  auto stream = raft::resource::get_cuda_stream(handle);
+  rmm::device_uvector<char> workspace(0, stream);
+  auto worksize = getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>(x, y, m, n, k);
+  workspace.resize(worksize, stream);
+  detail::distance<DistT, DataT, AccT, OutT, IdxT>(
+    handle, x, y, dist, m, n, k, workspace.data(), worksize, isRowMajor, metric_arg);
+}
+
+/**
+ * @brief Convenience wrapper around 'distance' prim to convert runtime metric
+ * into compile time for the purpose of dispatch
+ * @tparam Type input/accumulation/output data-type
+ * @tparam IdxT indexing type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points
+ * @param y second set of points
+ * @param dist output distance matrix
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ * @param workspace temporary workspace buffer which can get resized as per the
+ * needed workspace size
+ * @param metric distance metric
+ * @param isRowMajor whether the matrices are row-major or col-major
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <typename Type, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       const Type* x,
+                       const Type* y,
+                       Type* dist,
+                       IdxT m,
+                       IdxT n,
+                       IdxT k,
+                       rmm::device_uvector<char>& workspace,
+                       raft::distance::DistanceType metric,
+                       bool isRowMajor = true,
+                       Type metric_arg = 2.0f)
+{
+  cudaStream_t stream = raft::resource::get_cuda_stream(handle);
+
+  auto dispatch = [&](auto distance_type) {
+    auto worksize = getWorkspaceSize<distance_type(), Type, Type, Type, IdxT>(x, y, m, n, k);
+    workspace.resize(worksize, stream);
+    detail::distance<distance_type(), Type, Type, Type, IdxT>(
+      handle, x, y, dist, m, n, k, workspace.data(), worksize, isRowMajor, metric_arg);
+  };
+
+  switch (metric) {
+    case DistanceType::Canberra:
+      dispatch(std::integral_constant<DistanceType, DistanceType::Canberra>{});
+      break;
+    case DistanceType::CorrelationExpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::CorrelationExpanded>{});
+      break;
+    case DistanceType::CosineExpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::CosineExpanded>{});
+      break;
+    case DistanceType::HammingUnexpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::HammingUnexpanded>{});
+      break;
+    case DistanceType::HellingerExpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::HellingerExpanded>{});
+      break;
+    case raft::distance::DistanceType::InnerProduct:
+      dispatch(std::integral_constant<DistanceType, DistanceType::InnerProduct>{});
+      break;
+    case DistanceType::JensenShannon:
+      dispatch(std::integral_constant<DistanceType, DistanceType::JensenShannon>{});
+      break;
+    case DistanceType::KLDivergence:
+      dispatch(std::integral_constant<DistanceType, DistanceType::KLDivergence>{});
+      break;
+    case DistanceType::L1:
+      dispatch(std::integral_constant<DistanceType, DistanceType::L1>{});
+      break;
+    case DistanceType::L2Expanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::L2Expanded>{});
+      break;
+    case DistanceType::L2SqrtExpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::L2SqrtExpanded>{});
+      break;
+    case DistanceType::L2SqrtUnexpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::L2SqrtUnexpanded>{});
+      break;
+    case DistanceType::L2Unexpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::L2Unexpanded>{});
+      break;
+    case DistanceType::Linf:
+      dispatch(std::integral_constant<DistanceType, DistanceType::Linf>{});
+      break;
+    case DistanceType::LpUnexpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::LpUnexpanded>{});
+      break;
+    case DistanceType::RusselRaoExpanded:
+      dispatch(std::integral_constant<DistanceType, DistanceType::RusselRaoExpanded>{});
+      break;
+    default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric);
+  };
+}
+
+/**
+ * @brief Convenience wrapper around 'distance' prim to convert runtime metric
+ * into compile time for the purpose of dispatch
+ * @tparam Type input/accumulation/output data-type
+ * @tparam IdxT indexing type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points
+ * @param y second set of points
+ * @param dist output distance matrix
+ * @param m number of points in x
+ * @param n number of points in y
+ * @param k dimensionality
+ * @param metric distance metric
+ * @param isRowMajor whether the matrices are row-major or col-major
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <typename Type, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       const Type* x,
+                       const Type* y,
+                       Type* dist,
+                       IdxT m,
+                       IdxT n,
+                       IdxT k,
+                       raft::distance::DistanceType metric,
+                       bool isRowMajor = true,
+                       Type metric_arg = 2.0f)
+{
+  auto stream = raft::resource::get_cuda_stream(handle);
+  rmm::device_uvector<char> workspace(0, stream);
+  pairwise_distance<Type, IdxT>(
+    handle, x, y, dist, m, n, k, workspace, metric, isRowMajor, metric_arg);
+}
+
+/** @} */
+
+/**
+ * \defgroup distance_mdspan Pairwise distance functions
+ * @{
+ */
+
+/**
+ * @brief Evaluate pairwise distances for the simple use case.
+ *
+ * Note: Only contiguous row- or column-major layouts supported currently.
+ *
+ * Usage example:
+ * @code{.cpp}
+ * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/device_mdarray.hpp>
+ * #include <raft/random/make_blobs.cuh>
+ * #include <raft/distance/distance.cuh>
+ *
+ * raft::raft::device_resources handle;
+ * int n_samples = 5000;
+ * int n_features = 50;
+ *
+ * auto input = raft::make_device_matrix<float>(handle, n_samples, n_features);
+ * auto labels = raft::make_device_vector<int>(handle, n_samples);
+ * auto output = raft::make_device_matrix<float>(handle, n_samples, n_samples);
+ *
+ * raft::random::make_blobs(handle, input.view(), labels.view());
+ * auto metric = raft::distance::DistanceType::L2SqrtExpanded;
+ * raft::distance::pairwise_distance(handle, input.view(), input.view(), output.view(), metric);
+ * @endcode
+ *
+ * @tparam DistanceType which distance to evaluate
+ * @tparam DataT input argument type
+ * @tparam AccT accumulation type
+ * @tparam OutT output type
+ * @tparam IdxT Index type
+ * @param handle raft handle for managing expensive resources
+ * @param x first set of points (size n*k)
+ * @param y second set of points (size m*k)
+ * @param dist output distance matrix (size n*m)
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <raft::distance::DistanceType DistT,
+          typename DataT,
+          typename AccT,
+          typename OutT,
+          typename layout = raft::layout_c_contiguous,
+          typename IdxT   = int>
+void distance(raft::resources const& handle,
+              raft::device_matrix_view<DataT, IdxT, layout> const x,
+              raft::device_matrix_view<DataT, IdxT, layout> const y,
+              raft::device_matrix_view<OutT, IdxT, layout> dist,
+              DataT metric_arg = 2.0f)
+{
+  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
+  RAFT_EXPECTS(dist.extent(0) == x.extent(0),
+               "Number of rows in output must be equal to "
+               "number of rows in X");
+  RAFT_EXPECTS(dist.extent(1) == y.extent(0),
+               "Number of columns in output must be equal to "
+               "number of rows in Y");
+
+  RAFT_EXPECTS(x.is_exhaustive(), "Input x must be contiguous.");
+  RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
+
+  constexpr auto is_rowmajor = std::is_same_v<layout, layout_c_contiguous>;
+
+  distance<DistT, DataT, AccT, OutT, IdxT>(handle,
+                                           x.data_handle(),
+                                           y.data_handle(),
+                                           dist.data_handle(),
+                                           x.extent(0),
+                                           y.extent(0),
+                                           x.extent(1),
+                                           is_rowmajor,
+                                           metric_arg);
+}
+
+/**
+ * @brief Convenience wrapper around 'distance' prim to convert runtime metric
+ * into compile time for the purpose of dispatch
+ * @tparam Type input/accumulation/output data-type
+ * @tparam IdxT indexing type
+ * @param handle raft handle for managing expensive resources
+ * @param x first matrix of points (size mxk)
+ * @param y second matrix of points (size nxk)
+ * @param dist output distance matrix (size mxn)
+ * @param metric distance metric
+ * @param metric_arg metric argument (used for Minkowski distance)
+ */
+template <typename Type, typename layout = layout_c_contiguous, typename IdxT = int>
+void pairwise_distance(raft::resources const& handle,
+                       device_matrix_view<Type, IdxT, layout> const x,
+                       device_matrix_view<Type, IdxT, layout> const y,
+                       device_matrix_view<Type, IdxT, layout> dist,
+                       raft::distance::DistanceType metric,
+                       Type metric_arg = 2.0f)
+{
+  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
+  RAFT_EXPECTS(dist.extent(0) == x.extent(0),
+               "Number of rows in output must be equal to "
+               "number of rows in X");
+  RAFT_EXPECTS(dist.extent(1) == y.extent(0),
+               "Number of columns in output must be equal to "
+               "number of rows in Y");
+
+  RAFT_EXPECTS(x.is_exhaustive(), "Input x must be contiguous.");
+  RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
+  RAFT_EXPECTS(dist.is_exhaustive(), "Output must be contiguous.");
+
+  constexpr auto rowmajor = std::is_same_v<layout, layout_c_contiguous>;
+
+  auto stream = raft::resource::get_cuda_stream(handle);
+  rmm::device_uvector<char> workspace(0, stream);
+
+  pairwise_distance(handle,
+                    x.data_handle(),
+                    y.data_handle(),
+                    dist.data_handle(),
+                    x.extent(0),
+                    y.extent(0),
+                    x.extent(1),
+                    metric,
+                    rowmajor,
+                    metric_arg);
+}
+
+/** @} */
+
+};  // namespace distance
+};  // namespace raft
diff --git a/cpp/include/raft/distance/distance.cuh b/cpp/include/raft/distance/distance.cuh
index 5216902635..de70cd4691 100644
--- a/cpp/include/raft/distance/distance.cuh
+++ b/cpp/include/raft/distance/distance.cuh
@@ -13,470 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef __DISTANCE_H
-#define __DISTANCE_H
-
 #pragma once
 
-#include <raft/core/resource/cuda_stream.hpp>
-#include <raft/core/resources.hpp>
-#include <raft/distance/detail/distance.cuh>
-#include <raft/distance/distance_types.hpp>
-#include <rmm/device_uvector.hpp>
-#include <type_traits>
-
-#include <raft/core/device_mdspan.hpp>
-
-namespace raft {
-namespace distance {
-
-/**
- * @defgroup pairwise_distance pointer-based pairwise distance prims
- * @{
- */
-
-/**
- * @brief Evaluate pairwise distances with the user epilogue lamba allowed
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam FinalLambda user-defined epilogue lamba
- * @tparam Index_ Index type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points
- * @param y second set of points
- * @param dist output distance matrix
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- * @param workspace temporary workspace needed for computations
- * @param worksize number of bytes of the workspace
- * @param fin_op the final gemm epilogue lambda
- * @param isRowMajor whether the matrices are row-major or col-major
- * @param metric_arg metric argument (used for Minkowski distance)
- *
- * @note fin_op: This is a device lambda which is supposed to operate upon the
- * input which is AccType and returns the output in OutType. It's signature is
- * as follows:  <pre>OutType fin_op(AccType in, int g_idx);</pre>. If one needs
- * any other parameters, feel free to pass them via closure.
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename FinalLambda,
-          typename Index_ = int>
-void distance(raft::resources const& handle,
-              const InType* x,
-              const InType* y,
-              OutType* dist,
-              Index_ m,
-              Index_ n,
-              Index_ k,
-              void* workspace,
-              size_t worksize,
-              FinalLambda fin_op,
-              bool isRowMajor   = true,
-              InType metric_arg = 2.0f)
-{
-  detail::distance<distanceType, InType, AccType, OutType, FinalLambda, Index_>(
-    handle, x, y, dist, m, n, k, workspace, worksize, fin_op, isRowMajor, metric_arg);
-}
-
-/**
- * @brief Evaluate pairwise distances for the simple use case
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam Index_ Index type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points
- * @param y second set of points
- * @param dist output distance matrix
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- * @param workspace temporary workspace needed for computations
- * @param worksize number of bytes of the workspace
- * @param isRowMajor whether the matrices are row-major or col-major
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename Index_ = int>
-void distance(raft::resources const& handle,
-              const InType* x,
-              const InType* y,
-              OutType* dist,
-              Index_ m,
-              Index_ n,
-              Index_ k,
-              void* workspace,
-              size_t worksize,
-              bool isRowMajor   = true,
-              InType metric_arg = 2.0f)
-{
-  detail::distance<distanceType, InType, AccType, OutType, Index_>(
-    handle, x, y, dist, m, n, k, workspace, worksize, isRowMajor, metric_arg);
-}
-
-/**
- * @brief Return the exact workspace size to compute the distance
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam Index_ Index type
- * @param x first set of points
- * @param y second set of points
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- *
- * @note If the specified distanceType doesn't need the workspace at all, it
- * returns 0.
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename Index_ = int>
-size_t getWorkspaceSize(const InType* x, const InType* y, Index_ m, Index_ n, Index_ k)
-{
-  return detail::getWorkspaceSize<distanceType, InType, AccType, OutType, Index_>(x, y, m, n, k);
-}
-
-/**
- * @brief Return the exact workspace size to compute the distance
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam Index_ Index type
- * @param x first set of points (size m*k)
- * @param y second set of points (size n*k)
- * @return number of bytes needed in workspace
- *
- * @note If the specified distanceType doesn't need the workspace at all, it
- * returns 0.
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename Index_ = int,
-          typename layout>
-size_t getWorkspaceSize(const raft::device_matrix_view<InType, layout> x,
-                        const raft::device_matrix_view<InType, layout> y)
-{
-  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
-
-  return getWorkspaceSize<distanceType, InType, AccType, OutType, Index_>(
-    x.data(), y.data(), x.extent(0), y.extent(0), x.extent(1));
-}
-
-/**
- * @brief Evaluate pairwise distances for the simple use case
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam Index_ Index type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points
- * @param y second set of points
- * @param dist output distance matrix
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- * @param isRowMajor whether the matrices are row-major or col-major
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename Index_ = int>
-void distance(raft::resources const& handle,
-              const InType* x,
-              const InType* y,
-              OutType* dist,
-              Index_ m,
-              Index_ n,
-              Index_ k,
-              bool isRowMajor   = true,
-              InType metric_arg = 2.0f)
-{
-  auto stream = raft::resource::get_cuda_stream(handle);
-  rmm::device_uvector<char> workspace(0, stream);
-  auto worksize = getWorkspaceSize<distanceType, InType, AccType, OutType, Index_>(x, y, m, n, k);
-  workspace.resize(worksize, stream);
-  detail::distance<distanceType, InType, AccType, OutType, Index_>(
-    handle, x, y, dist, m, n, k, workspace.data(), worksize, isRowMajor, metric_arg);
-}
-
-/**
- * @brief Convenience wrapper around 'distance' prim to convert runtime metric
- * into compile time for the purpose of dispatch
- * @tparam Type input/accumulation/output data-type
- * @tparam Index_ indexing type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points
- * @param y second set of points
- * @param dist output distance matrix
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- * @param workspace temporary workspace buffer which can get resized as per the
- * needed workspace size
- * @param metric distance metric
- * @param isRowMajor whether the matrices are row-major or col-major
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <typename Type, typename Index_ = int>
-void pairwise_distance(raft::resources const& handle,
-                       const Type* x,
-                       const Type* y,
-                       Type* dist,
-                       Index_ m,
-                       Index_ n,
-                       Index_ k,
-                       rmm::device_uvector<char>& workspace,
-                       raft::distance::DistanceType metric,
-                       bool isRowMajor = true,
-                       Type metric_arg = 2.0f)
-{
-  cudaStream_t stream = raft::resource::get_cuda_stream(handle);
-
-  auto dispatch = [&](auto distance_type) {
-    auto worksize = getWorkspaceSize<distance_type(), Type, Type, Type, Index_>(x, y, m, n, k);
-    workspace.resize(worksize, stream);
-    detail::distance<distance_type(), Type, Type, Type, Index_>(
-      handle, x, y, dist, m, n, k, workspace.data(), worksize, isRowMajor, metric_arg);
-  };
-
-  switch (metric) {
-    case DistanceType::Canberra:
-      dispatch(std::integral_constant<DistanceType, DistanceType::Canberra>{});
-      break;
-    case DistanceType::CorrelationExpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::CorrelationExpanded>{});
-      break;
-    case DistanceType::CosineExpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::CosineExpanded>{});
-      break;
-    case DistanceType::HammingUnexpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::HammingUnexpanded>{});
-      break;
-    case DistanceType::HellingerExpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::HellingerExpanded>{});
-      break;
-    case raft::distance::DistanceType::InnerProduct:
-      dispatch(std::integral_constant<DistanceType, DistanceType::InnerProduct>{});
-      break;
-    case DistanceType::JensenShannon:
-      dispatch(std::integral_constant<DistanceType, DistanceType::JensenShannon>{});
-      break;
-    case DistanceType::KLDivergence:
-      dispatch(std::integral_constant<DistanceType, DistanceType::KLDivergence>{});
-      break;
-    case DistanceType::L1:
-      dispatch(std::integral_constant<DistanceType, DistanceType::L1>{});
-      break;
-    case DistanceType::L2Expanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::L2Expanded>{});
-      break;
-    case DistanceType::L2SqrtExpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::L2SqrtExpanded>{});
-      break;
-    case DistanceType::L2SqrtUnexpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::L2SqrtUnexpanded>{});
-      break;
-    case DistanceType::L2Unexpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::L2Unexpanded>{});
-      break;
-    case DistanceType::Linf:
-      dispatch(std::integral_constant<DistanceType, DistanceType::Linf>{});
-      break;
-    case DistanceType::LpUnexpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::LpUnexpanded>{});
-      break;
-    case DistanceType::RusselRaoExpanded:
-      dispatch(std::integral_constant<DistanceType, DistanceType::RusselRaoExpanded>{});
-      break;
-    default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric);
-  };
-}
-
-/**
- * @brief Convenience wrapper around 'distance' prim to convert runtime metric
- * into compile time for the purpose of dispatch
- * @tparam Type input/accumulation/output data-type
- * @tparam Index_ indexing type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points
- * @param y second set of points
- * @param dist output distance matrix
- * @param m number of points in x
- * @param n number of points in y
- * @param k dimensionality
- * @param metric distance metric
- * @param isRowMajor whether the matrices are row-major or col-major
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <typename Type, typename Index_ = int>
-void pairwise_distance(raft::resources const& handle,
-                       const Type* x,
-                       const Type* y,
-                       Type* dist,
-                       Index_ m,
-                       Index_ n,
-                       Index_ k,
-                       raft::distance::DistanceType metric,
-                       bool isRowMajor = true,
-                       Type metric_arg = 2.0f)
-{
-  auto stream = raft::resource::get_cuda_stream(handle);
-  rmm::device_uvector<char> workspace(0, stream);
-  pairwise_distance<Type, Index_>(
-    handle, x, y, dist, m, n, k, workspace, metric, isRowMajor, metric_arg);
-}
-
-/** @} */
-
-/**
- * \defgroup distance_mdspan Pairwise distance functions
- * @{
- */
-
-/**
- * @brief Evaluate pairwise distances for the simple use case.
- *
- * Note: Only contiguous row- or column-major layouts supported currently.
- *
- * Usage example:
- * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
- * #include <raft/core/device_mdarray.hpp>
- * #include <raft/random/make_blobs.cuh>
- * #include <raft/distance/distance.cuh>
- *
- * raft::raft::device_resources handle;
- * int n_samples = 5000;
- * int n_features = 50;
- *
- * auto input = raft::make_device_matrix<float>(handle, n_samples, n_features);
- * auto labels = raft::make_device_vector<int>(handle, n_samples);
- * auto output = raft::make_device_matrix<float>(handle, n_samples, n_samples);
- *
- * raft::random::make_blobs(handle, input.view(), labels.view());
- * auto metric = raft::distance::DistanceType::L2SqrtExpanded;
- * raft::distance::pairwise_distance(handle, input.view(), input.view(), output.view(), metric);
- * @endcode
- *
- * @tparam DistanceType which distance to evaluate
- * @tparam InType input argument type
- * @tparam AccType accumulation type
- * @tparam OutType output type
- * @tparam Index_ Index type
- * @param handle raft handle for managing expensive resources
- * @param x first set of points (size n*k)
- * @param y second set of points (size m*k)
- * @param dist output distance matrix (size n*m)
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <raft::distance::DistanceType distanceType,
-          typename InType,
-          typename AccType,
-          typename OutType,
-          typename layout = raft::layout_c_contiguous,
-          typename Index_ = int>
-void distance(raft::resources const& handle,
-              raft::device_matrix_view<InType, Index_, layout> const x,
-              raft::device_matrix_view<InType, Index_, layout> const y,
-              raft::device_matrix_view<OutType, Index_, layout> dist,
-              InType metric_arg = 2.0f)
-{
-  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
-  RAFT_EXPECTS(dist.extent(0) == x.extent(0),
-               "Number of rows in output must be equal to "
-               "number of rows in X");
-  RAFT_EXPECTS(dist.extent(1) == y.extent(0),
-               "Number of columns in output must be equal to "
-               "number of rows in Y");
-
-  RAFT_EXPECTS(x.is_exhaustive(), "Input x must be contiguous.");
-  RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
-
-  constexpr auto is_rowmajor = std::is_same_v<layout, layout_c_contiguous>;
-
-  distance<distanceType, InType, AccType, OutType, Index_>(handle,
-                                                           x.data_handle(),
-                                                           y.data_handle(),
-                                                           dist.data_handle(),
-                                                           x.extent(0),
-                                                           y.extent(0),
-                                                           x.extent(1),
-                                                           is_rowmajor,
-                                                           metric_arg);
-}
-
-/**
- * @brief Convenience wrapper around 'distance' prim to convert runtime metric
- * into compile time for the purpose of dispatch
- * @tparam Type input/accumulation/output data-type
- * @tparam Index_ indexing type
- * @param handle raft handle for managing expensive resources
- * @param x first matrix of points (size mxk)
- * @param y second matrix of points (size nxk)
- * @param dist output distance matrix (size mxn)
- * @param metric distance metric
- * @param metric_arg metric argument (used for Minkowski distance)
- */
-template <typename Type, typename layout = layout_c_contiguous, typename Index_ = int>
-void pairwise_distance(raft::resources const& handle,
-                       device_matrix_view<Type, Index_, layout> const x,
-                       device_matrix_view<Type, Index_, layout> const y,
-                       device_matrix_view<Type, Index_, layout> dist,
-                       raft::distance::DistanceType metric,
-                       Type metric_arg = 2.0f)
-{
-  RAFT_EXPECTS(x.extent(1) == y.extent(1), "Number of columns must be equal.");
-  RAFT_EXPECTS(dist.extent(0) == x.extent(0),
-               "Number of rows in output must be equal to "
-               "number of rows in X");
-  RAFT_EXPECTS(dist.extent(1) == y.extent(0),
-               "Number of columns in output must be equal to "
-               "number of rows in Y");
-
-  RAFT_EXPECTS(x.is_exhaustive(), "Input x must be contiguous.");
-  RAFT_EXPECTS(y.is_exhaustive(), "Input y must be contiguous.");
-  RAFT_EXPECTS(dist.is_exhaustive(), "Output must be contiguous.");
-
-  constexpr auto rowmajor = std::is_same_v<layout, layout_c_contiguous>;
-
-  auto stream = raft::resource::get_cuda_stream(handle);
-  rmm::device_uvector<char> workspace(0, stream);
-
-  pairwise_distance(handle,
-                    x.data_handle(),
-                    y.data_handle(),
-                    dist.data_handle(),
-                    x.extent(0),
-                    y.extent(0),
-                    x.extent(1),
-                    metric,
-                    rowmajor,
-                    metric_arg);
-}
-
-/** @} */
-
-};  // namespace distance
-};  // namespace raft
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "distance-inl.cuh"
+#endif
 
+#ifdef RAFT_COMPILED
+#include "distance-ext.cuh"
 #endif
diff --git a/cpp/include/raft/distance/fused_l2_nn-ext.cuh b/cpp/include/raft/distance/fused_l2_nn-ext.cuh
new file mode 100644
index 0000000000..05732c1f3f
--- /dev/null
+++ b/cpp/include/raft/distance/fused_l2_nn-ext.cuh
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                                // int64_t
+#include <raft/core/device_resources.hpp>         // raft::device_resources
+#include <raft/core/kvp.hpp>                      // raft::KeyValuePair
+#include <raft/distance/fused_l2_nn_helpers.cuh>  // include initialize and reduce operations
+#include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft {
+namespace distance {
+
+template <typename DataT, typename OutT, typename IdxT>
+void fusedL2NNMinReduce(OutT* min,
+                        const DataT* x,
+                        const DataT* y,
+                        const DataT* xn,
+                        const DataT* yn,
+                        IdxT m,
+                        IdxT n,
+                        IdxT k,
+                        void* workspace,
+                        bool sqrt,
+                        bool initOutBuffer,
+                        cudaStream_t stream) RAFT_EXPLICIT;
+
+}  // namespace distance
+}  // namespace raft
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_distance_fusedL2NNMinReduce(DataT, OutT, IdxT)                          \
+  extern template void raft::distance::fusedL2NNMinReduce<DataT, OutT, IdxT>(OutT * min,         \
+                                                                             const DataT* x,     \
+                                                                             const DataT* y,     \
+                                                                             const DataT* xn,    \
+                                                                             const DataT* yn,    \
+                                                                             IdxT m,             \
+                                                                             IdxT n,             \
+                                                                             IdxT k,             \
+                                                                             void* workspace,    \
+                                                                             bool sqrt,          \
+                                                                             bool initOutBuffer, \
+                                                                             cudaStream_t stream)
+
+instantiate_raft_distance_fusedL2NNMinReduce(double, double, int);
+instantiate_raft_distance_fusedL2NNMinReduce(double, double, int64_t);
+instantiate_raft_distance_fusedL2NNMinReduce(float, float, int);
+instantiate_raft_distance_fusedL2NNMinReduce(float, float, int64_t);
+
+// We can't have comma's in the macro expansion, so we use the COMMA macro:
+#define COMMA ,
+
+instantiate_raft_distance_fusedL2NNMinReduce(double, raft::KeyValuePair<int COMMA double>, int);
+instantiate_raft_distance_fusedL2NNMinReduce(double,
+                                             raft::KeyValuePair<int64_t COMMA double>,
+                                             int64_t);
+instantiate_raft_distance_fusedL2NNMinReduce(float, raft::KeyValuePair<int COMMA float>, int);
+instantiate_raft_distance_fusedL2NNMinReduce(float,
+                                             raft::KeyValuePair<int64_t COMMA float>,
+                                             int64_t);
+
+#undef COMMA
+
+#undef instantiate_raft_distance_fusedL2NNMinReduce
diff --git a/cpp/include/raft/distance/fused_l2_nn-inl.cuh b/cpp/include/raft/distance/fused_l2_nn-inl.cuh
new file mode 100644
index 0000000000..698d287f87
--- /dev/null
+++ b/cpp/include/raft/distance/fused_l2_nn-inl.cuh
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FUSED_L2_NN_H
+#define __FUSED_L2_NN_H
+
+#pragma once
+
+#include <cub/cub.cuh>
+#include <limits>
+#include <raft/core/device_resources.hpp>
+#include <raft/distance/detail/fused_l2_nn.cuh>
+#include <raft/distance/fused_l2_nn_helpers.cuh>
+#include <raft/linalg/contractions.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <stdint.h>
+#include <type_traits>
+
+namespace raft {
+namespace distance {
+
+/**
+ * \ingroup fused_l2_nn
+ * @{
+ */
+/**
+ * @brief Fused L2 distance and 1-nearest-neighbor computation in a single call.
+ *
+ * The benefits of such a call are 2-fold: 1) eliminate the need for an
+ * intermediate buffer to store the output of gemm 2) reduce the memory read
+ * traffic on this intermediate buffer, otherwise needed during the reduction
+ * phase for 1-NN.
+ *
+ * @tparam DataT     data type
+ * @tparam OutT      output type to either store 1-NN indices and their minimum
+ *                   distances or store only the min distances. Accordingly, one
+ *                   has to pass an appropriate `ReduceOpT`
+ * @tparam IdxT      indexing arithmetic type
+ * @tparam ReduceOpT A struct to perform the final needed reduction operation
+ *                   and also to initialize the output array elements with the
+ *                   appropriate initial value needed for reduction.
+ *
+ * @param[out] min           will contain the reduced output (Length = `m`)
+ *                           (on device)
+ * @param[in]  x             first matrix. Row major. Dim = `m x k`.
+ *                           (on device).
+ * @param[in]  y             second matrix. Row major. Dim = `n x k`.
+ *                           (on device).
+ * @param[in]  xn            L2 squared norm of `x`. Length = `m`. (on device).
+ * @param[in]  yn            L2 squared norm of `y`. Length = `n`. (on device)
+ * @param[in]  m             gemm m
+ * @param[in]  n             gemm n
+ * @param[in]  k             gemm k
+ * @param[in]  workspace     temp workspace. Size = sizeof(int)*m. (on device)
+ * @param[in]  redOp         reduction operator in the epilogue
+ * @param[in] pairRedOp reduction operation on key value pairs
+ * @param[in]  sqrt          Whether the output `minDist` should contain L2-sqrt
+ * @param[in]  initOutBuffer whether to initialize the output buffer before the
+ *                           main kernel launch
+ * @param[in]  stream        cuda stream
+ */
+template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT, typename KVPReduceOpT>
+void fusedL2NN(OutT* min,
+               const DataT* x,
+               const DataT* y,
+               const DataT* xn,
+               const DataT* yn,
+               IdxT m,
+               IdxT n,
+               IdxT k,
+               void* workspace,
+               ReduceOpT redOp,
+               KVPReduceOpT pairRedOp,
+               bool sqrt,
+               bool initOutBuffer,
+               cudaStream_t stream)
+{
+  // When k is smaller than 32, the Policy4x4 results in redundant calculations
+  // as it uses tiles that have k=32. Therefore, use a "skinny" policy instead
+  // that uses tiles with a smaller value of k.
+  bool is_skinny = k < 32;
+
+  size_t bytes = sizeof(DataT) * k;
+  auto px      = reinterpret_cast<uintptr_t>(x);
+  auto py      = reinterpret_cast<uintptr_t>(y);
+  if (16 % sizeof(DataT) == 0 && bytes % 16 == 0 && px % 16 == 0 && py % 16 == 0) {
+    if (is_skinny) {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4Skinny<DataT, 16 / sizeof(DataT)>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    } else {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4<DataT, 16 / sizeof(DataT)>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    }
+  } else if (8 % sizeof(DataT) == 0 && bytes % 8 == 0 && px % 8 == 0 && py % 8 == 0) {
+    if (is_skinny) {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4Skinny<DataT, 8 / sizeof(DataT)>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    } else {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4<DataT, 8 / sizeof(DataT)>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    }
+  } else {
+    if (is_skinny) {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4Skinny<DataT, 1>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    } else {
+      detail::fusedL2NNImpl<DataT,
+                            OutT,
+                            IdxT,
+                            typename linalg::Policy4x4<DataT, 1>::Policy,
+                            ReduceOpT>(
+        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+    }
+  }
+}
+
+/**
+ * @brief Wrapper around fusedL2NN with minimum reduction operators.
+ *
+ * fusedL2NN cannot be compiled in the distance library due to the lambda
+ * operators, so this wrapper covers the most common case (minimum).
+ * This should be preferred to the more generic API when possible, in order to
+ * reduce compilation times for users of the shared library.
+ *
+ * @tparam DataT     data type
+ * @tparam OutT      output type to either store 1-NN indices and their minimum
+ *                   distances (e.g. raft::KeyValuePair<int, float>) or store only the min
+ * distances.
+ * @tparam IdxT      indexing arithmetic type
+ * @param[out] min           will contain the reduced output (Length = `m`)
+ *                           (on device)
+ * @param[in]  x             first matrix. Row major. Dim = `m x k`.
+ *                           (on device).
+ * @param[in]  y             second matrix. Row major. Dim = `n x k`.
+ *                           (on device).
+ * @param[in]  xn            L2 squared norm of `x`. Length = `m`. (on device).
+ * @param[in]  yn            L2 squared norm of `y`. Length = `n`. (on device)
+ * @param[in]  m             gemm m
+ * @param[in]  n             gemm n
+ * @param[in]  k             gemm k
+ * @param[in]  workspace     temp workspace. Size = sizeof(int)*m. (on device)
+ * @param[in]  sqrt          Whether the output `minDist` should contain L2-sqrt
+ * @param[in]  initOutBuffer whether to initialize the output buffer before the
+ *                           main kernel launch
+ * @param[in]  stream        cuda stream
+ */
+template <typename DataT, typename OutT, typename IdxT>
+void fusedL2NNMinReduce(OutT* min,
+                        const DataT* x,
+                        const DataT* y,
+                        const DataT* xn,
+                        const DataT* yn,
+                        IdxT m,
+                        IdxT n,
+                        IdxT k,
+                        void* workspace,
+                        bool sqrt,
+                        bool initOutBuffer,
+                        cudaStream_t stream)
+{
+  MinAndDistanceReduceOp<IdxT, DataT> redOp;
+  KVPMinReduce<IdxT, DataT> pairRedOp;
+
+  fusedL2NN<DataT, OutT, IdxT>(
+    min, x, y, xn, yn, m, n, k, workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
+}
+
+/** @} */
+
+}  // namespace distance
+}  // namespace raft
+
+#endif
diff --git a/cpp/include/raft/distance/fused_l2_nn.cuh b/cpp/include/raft/distance/fused_l2_nn.cuh
index e832bcb020..b1a3551323 100644
--- a/cpp/include/raft/distance/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/fused_l2_nn.cuh
@@ -13,218 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#ifndef __FUSED_L2_NN_H
-#define __FUSED_L2_NN_H
-
 #pragma once
 
-#include <cub/cub.cuh>
-#include <limits>
-#include <raft/core/device_resources.hpp>
-#include <raft/distance/detail/fused_l2_nn.cuh>
-#include <raft/linalg/contractions.cuh>
-#include <raft/util/cuda_utils.cuh>
-#include <stdint.h>
-#include <type_traits>
-
-namespace raft {
-namespace distance {
-/**
- * \defgroup fused_l2_nn Fused 1-nearest neighbors
- * @{
- */
-
-template <typename LabelT, typename DataT>
-using KVPMinReduce = detail::KVPMinReduceImpl<LabelT, DataT>;
-
-template <typename LabelT, typename DataT>
-using MinAndDistanceReduceOp = detail::MinAndDistanceReduceOpImpl<LabelT, DataT>;
-
-template <typename LabelT, typename DataT>
-using MinReduceOp = detail::MinReduceOpImpl<LabelT, DataT>;
-
-/** @} */
-
-/**
- * Initialize array using init value from reduction op
- */
-template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT>
-void initialize(
-  raft::device_resources const& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
-{
-  detail::initialize<DataT, OutT, IdxT, ReduceOpT>(min, m, maxVal, redOp, handle.get_stream());
-}
-
-/**
- * \ingroup fused_l2_nn
- * @{
- */
-/**
- * @brief Fused L2 distance and 1-nearest-neighbor computation in a single call.
- *
- * The benefits of such a call are 2-fold: 1) eliminate the need for an
- * intermediate buffer to store the output of gemm 2) reduce the memory read
- * traffic on this intermediate buffer, otherwise needed during the reduction
- * phase for 1-NN.
- *
- * @tparam DataT     data type
- * @tparam OutT      output type to either store 1-NN indices and their minimum
- *                   distances or store only the min distances. Accordingly, one
- *                   has to pass an appropriate `ReduceOpT`
- * @tparam IdxT      indexing arithmetic type
- * @tparam ReduceOpT A struct to perform the final needed reduction operation
- *                   and also to initialize the output array elements with the
- *                   appropriate initial value needed for reduction.
- *
- * @param[out] min           will contain the reduced output (Length = `m`)
- *                           (on device)
- * @param[in]  x             first matrix. Row major. Dim = `m x k`.
- *                           (on device).
- * @param[in]  y             second matrix. Row major. Dim = `n x k`.
- *                           (on device).
- * @param[in]  xn            L2 squared norm of `x`. Length = `m`. (on device).
- * @param[in]  yn            L2 squared norm of `y`. Length = `n`. (on device)
- * @param[in]  m             gemm m
- * @param[in]  n             gemm n
- * @param[in]  k             gemm k
- * @param[in]  workspace     temp workspace. Size = sizeof(int)*m. (on device)
- * @param[in]  redOp         reduction operator in the epilogue
- * @param[in] pairRedOp reduction operation on key value pairs
- * @param[in]  sqrt          Whether the output `minDist` should contain L2-sqrt
- * @param[in]  initOutBuffer whether to initialize the output buffer before the
- *                           main kernel launch
- * @param[in]  stream        cuda stream
- */
-template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT, typename KVPReduceOpT>
-void fusedL2NN(OutT* min,
-               const DataT* x,
-               const DataT* y,
-               const DataT* xn,
-               const DataT* yn,
-               IdxT m,
-               IdxT n,
-               IdxT k,
-               void* workspace,
-               ReduceOpT redOp,
-               KVPReduceOpT pairRedOp,
-               bool sqrt,
-               bool initOutBuffer,
-               cudaStream_t stream)
-{
-  // When k is smaller than 32, the Policy4x4 results in redundant calculations
-  // as it uses tiles that have k=32. Therefore, use a "skinny" policy instead
-  // that uses tiles with a smaller value of k.
-  bool is_skinny = k < 32;
-
-  size_t bytes = sizeof(DataT) * k;
-  auto px      = reinterpret_cast<uintptr_t>(x);
-  auto py      = reinterpret_cast<uintptr_t>(y);
-  if (16 % sizeof(DataT) == 0 && bytes % 16 == 0 && px % 16 == 0 && py % 16 == 0) {
-    if (is_skinny) {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4Skinny<DataT, 16 / sizeof(DataT)>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    } else {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4<DataT, 16 / sizeof(DataT)>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    }
-  } else if (8 % sizeof(DataT) == 0 && bytes % 8 == 0 && px % 8 == 0 && py % 8 == 0) {
-    if (is_skinny) {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4Skinny<DataT, 8 / sizeof(DataT)>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    } else {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4<DataT, 8 / sizeof(DataT)>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    }
-  } else {
-    if (is_skinny) {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4Skinny<DataT, 1>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    } else {
-      detail::fusedL2NNImpl<DataT,
-                            OutT,
-                            IdxT,
-                            typename linalg::Policy4x4<DataT, 1>::Policy,
-                            ReduceOpT>(
-        min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-    }
-  }
-}
-
-/**
- * @brief Wrapper around fusedL2NN with minimum reduction operators.
- *
- * fusedL2NN cannot be compiled in the distance library due to the lambda
- * operators, so this wrapper covers the most common case (minimum).
- * This should be preferred to the more generic API when possible, in order to
- * reduce compilation times for users of the shared library.
- *
- * @tparam DataT     data type
- * @tparam OutT      output type to either store 1-NN indices and their minimum
- *                   distances (e.g. raft::KeyValuePair<int, float>) or store only the min
- * distances.
- * @tparam IdxT      indexing arithmetic type
- * @param[out] min           will contain the reduced output (Length = `m`)
- *                           (on device)
- * @param[in]  x             first matrix. Row major. Dim = `m x k`.
- *                           (on device).
- * @param[in]  y             second matrix. Row major. Dim = `n x k`.
- *                           (on device).
- * @param[in]  xn            L2 squared norm of `x`. Length = `m`. (on device).
- * @param[in]  yn            L2 squared norm of `y`. Length = `n`. (on device)
- * @param[in]  m             gemm m
- * @param[in]  n             gemm n
- * @param[in]  k             gemm k
- * @param[in]  workspace     temp workspace. Size = sizeof(int)*m. (on device)
- * @param[in]  sqrt          Whether the output `minDist` should contain L2-sqrt
- * @param[in]  initOutBuffer whether to initialize the output buffer before the
- *                           main kernel launch
- * @param[in]  stream        cuda stream
- */
-template <typename DataT, typename OutT, typename IdxT>
-void fusedL2NNMinReduce(OutT* min,
-                        const DataT* x,
-                        const DataT* y,
-                        const DataT* xn,
-                        const DataT* yn,
-                        IdxT m,
-                        IdxT n,
-                        IdxT k,
-                        void* workspace,
-                        bool sqrt,
-                        bool initOutBuffer,
-                        cudaStream_t stream)
-{
-  MinAndDistanceReduceOp<IdxT, DataT> redOp;
-  KVPMinReduce<IdxT, DataT> pairRedOp;
-
-  fusedL2NN<DataT, OutT, IdxT>(
-    min, x, y, xn, yn, m, n, k, workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream);
-}
-
-/** @} */
-
-}  // namespace distance
-}  // namespace raft
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "fused_l2_nn-inl.cuh"
+#endif
 
+#ifdef RAFT_COMPILED
+#include "fused_l2_nn-ext.cuh"
 #endif
diff --git a/cpp/include/raft/distance/fused_l2_nn_helpers.cuh b/cpp/include/raft/distance/fused_l2_nn_helpers.cuh
new file mode 100644
index 0000000000..1bcd7d8dba
--- /dev/null
+++ b/cpp/include/raft/distance/fused_l2_nn_helpers.cuh
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/distance/detail/fused_l2_nn.cuh>
+
+namespace raft::distance {
+
+/**
+ * \defgroup fused_l2_nn Fused 1-nearest neighbors
+ * @{
+ */
+
+template <typename LabelT, typename DataT>
+using KVPMinReduce = detail::KVPMinReduceImpl<LabelT, DataT>;
+
+template <typename LabelT, typename DataT>
+using MinAndDistanceReduceOp = detail::MinAndDistanceReduceOpImpl<LabelT, DataT>;
+
+template <typename LabelT, typename DataT>
+using MinReduceOp = detail::MinReduceOpImpl<LabelT, DataT>;
+
+/** @} */
+
+/**
+ * Initialize array using init value from reduction op
+ */
+template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT>
+void initialize(
+  raft::device_resources const& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
+{
+  detail::initialize<DataT, OutT, IdxT, ReduceOpT>(min, m, maxVal, redOp, handle.get_stream());
+}
+
+}  // namespace raft::distance
diff --git a/cpp/include/raft/distance/specializations.cuh b/cpp/include/raft/distance/specializations.cuh
index 5944534be7..ed0b6848ae 100644
--- a/cpp/include/raft/distance/specializations.cuh
+++ b/cpp/include/raft/distance/specializations.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,12 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#ifndef __DISTANCE_SPECIALIZATIONS_H
-#define __DISTANCE_SPECIALIZATIONS_H
-
 #pragma once
 
-#include <raft/distance/specializations/distance.cuh>
-
-#endif
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/distance/specializations/detail/00_write_template.py b/cpp/include/raft/distance/specializations/detail/00_write_template.py
deleted file mode 100644
index 63ae6580b4..0000000000
--- a/cpp/include/raft/distance/specializations/detail/00_write_template.py
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env python3
-
-# This template manages all files in this directory, apart from
-# inner_product.cuh and kernels.cuh.
-
-
-# NOTE: this template is not perfectly formatted. Use pre-commit to get
-# everything in shape again.
-start_template = """/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-"""
-
-extern_template = """
-extern template void pairwise_matrix_instantiation_point<OpT,
-                                                         IdxT,
-                                                         DataT,
-                                                         OutT,
-                                                         FinopT>(
-  OpT,
-  pairwise_matrix_params<IdxT, DataT, OutT, FinopT>,
-  cudaStream_t);
-"""
-
-end_template = """}  // namespace raft::distance::detail
-"""
-
-data_type_instances = [
-    dict(
-        DataT="float",
-        AccT="float",
-        OutT="float",
-        IdxT="int",
-    ),
-    dict(
-        DataT="double",
-        AccT="double",
-        OutT="double",
-        IdxT="int",
-    ),
-]
-
-
-
-
-op_instances = [
-    dict(
-        path_prefix="canberra",
-        OpT="ops::canberra_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="correlation",
-        OpT="ops::correlation_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="cosine",
-        OpT="ops::cosine_distance_op<DataT, AccT, IdxT>",
-        # cosine uses CUTLASS for SM80+
-    ),
-    dict(
-        path_prefix="hamming_unexpanded",
-        OpT="ops::hamming_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="hellinger_expanded",
-        OpT="ops::hellinger_distance_op<DataT, AccT, IdxT>",
-    ),
-    # inner product is handled by cublas.
-    dict(
-        path_prefix="jensen_shannon",
-        OpT="ops::jensen_shannon_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="kl_divergence",
-        OpT="ops::kl_divergence_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="l1",
-        OpT="ops::l1_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="l2_expanded",
-        OpT="ops::l2_exp_distance_op<DataT, AccT, IdxT>",
-        # L2 expanded uses CUTLASS for SM80+
-    ),
-    dict(
-        path_prefix="l2_unexpanded",
-        OpT="ops::l2_unexp_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="l_inf",
-        OpT="ops::l_inf_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="lp_unexpanded",
-        OpT="ops::lp_unexp_distance_op<DataT, AccT, IdxT>",
-    ),
-    dict(
-        path_prefix="russel_rao",
-        OpT="ops::russel_rao_distance_op<DataT, AccT, IdxT>",
-    ),
-]
-
-def fill_in(s, template):
-    for k, v in template.items():
-        s = s.replace(k, v)
-    return s
-
-for op_instance in op_instances:
-    path = fill_in("path_prefix.cuh", op_instance)
-    with open(path, "w") as f:
-        f.write(start_template)
-
-        for data_type_instance in data_type_instances:
-            op_data_instance = {
-                k : fill_in(v, data_type_instance)
-                for k, v in op_instance.items()
-            }
-            instance = {
-                **op_data_instance,
-                **data_type_instance,
-                "FinopT": "raft::identity_op",
-            }
-
-            text = fill_in(extern_template, instance)
-
-            f.write(text)
-
-        f.write(end_template)
diff --git a/cpp/include/raft/distance/specializations/detail/canberra.cuh b/cpp/include/raft/distance/specializations/detail/canberra.cuh
deleted file mode 100644
index 276c85e5f6..0000000000
--- a/cpp/include/raft/distance/specializations/detail/canberra.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::canberra_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::canberra_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::canberra_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::canberra_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/correlation.cuh b/cpp/include/raft/distance/specializations/detail/correlation.cuh
deleted file mode 100644
index f019f678df..0000000000
--- a/cpp/include/raft/distance/specializations/detail/correlation.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::correlation_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::correlation_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::correlation_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::correlation_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/cosine.cuh b/cpp/include/raft/distance/specializations/detail/cosine.cuh
deleted file mode 100644
index dcde4ec286..0000000000
--- a/cpp/include/raft/distance/specializations/detail/cosine.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<ops::cosine_distance_op<float, float, int>,
-                                                         int,
-                                                         float,
-                                                         float,
-                                                         raft::identity_op>(
-  ops::cosine_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, raft::identity_op>,
-  cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::cosine_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::cosine_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh b/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh
deleted file mode 100644
index 1d6964fbce..0000000000
--- a/cpp/include/raft/distance/specializations/detail/hamming_unexpanded.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::hamming_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::hamming_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::hamming_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::hamming_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh b/cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh
deleted file mode 100644
index f96a06f919..0000000000
--- a/cpp/include/raft/distance/specializations/detail/hellinger_expanded.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::hellinger_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::hellinger_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::hellinger_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::hellinger_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/inner_product.cuh b/cpp/include/raft/distance/specializations/detail/inner_product.cuh
deleted file mode 100644
index d97d678928..0000000000
--- a/cpp/include/raft/distance/specializations/detail/inner_product.cuh
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft {
-namespace distance {
-namespace detail {
-extern template void distance<raft::distance::DistanceType::InnerProduct, float, float, float, int>(
-  raft::resources const& handle,
-  const float* x,
-  const float* y,
-  float* dist,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  size_t worksize,
-  bool isRowMajor,
-  float metric_arg);
-
-extern template void
-distance<raft::distance::DistanceType::InnerProduct, double, double, double, int>(
-  raft::resources const& handle,
-  const double* x,
-  const double* y,
-  double* dist,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  size_t worksize,
-  bool isRowMajor,
-  double metric_arg);
-}  // namespace detail
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh b/cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh
deleted file mode 100644
index 0b58646582..0000000000
--- a/cpp/include/raft/distance/specializations/detail/jensen_shannon.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::jensen_shannon_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::jensen_shannon_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::jensen_shannon_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::jensen_shannon_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/kernels.cuh b/cpp/include/raft/distance/specializations/detail/kernels.cuh
deleted file mode 100644
index 75c9c023e8..0000000000
--- a/cpp/include/raft/distance/specializations/detail/kernels.cuh
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/distance/detail/kernels/gram_matrix.cuh>
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-
-extern template class raft::distance::kernels::detail::GramMatrixBase<double>;
-extern template class raft::distance::kernels::detail::GramMatrixBase<float>;
-
-extern template class raft::distance::kernels::detail::PolynomialKernel<double, int>;
-extern template class raft::distance::kernels::detail::PolynomialKernel<float, int>;
-
-extern template class raft::distance::kernels::detail::TanhKernel<double>;
-extern template class raft::distance::kernels::detail::TanhKernel<float>;
-
-// These are somehow missing a kernel definition which is causing a compile error
-// extern template class raft::distance::kernels::detail::RBFKernel<double>;
-// extern template class raft::distance::kernels::detail::RBFKernel<float>;
\ No newline at end of file
diff --git a/cpp/include/raft/distance/specializations/detail/kl_divergence.cuh b/cpp/include/raft/distance/specializations/detail/kl_divergence.cuh
deleted file mode 100644
index 5c164e0fd4..0000000000
--- a/cpp/include/raft/distance/specializations/detail/kl_divergence.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<ops::kl_divergence_op<float, float, int>,
-                                                         int,
-                                                         float,
-                                                         float,
-                                                         raft::identity_op>(
-  ops::kl_divergence_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, raft::identity_op>,
-  cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<ops::kl_divergence_op<double, double, int>,
-                                                         int,
-                                                         double,
-                                                         double,
-                                                         raft::identity_op>(
-  ops::kl_divergence_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, raft::identity_op>,
-  cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/l1.cuh b/cpp/include/raft/distance/specializations/detail/l1.cuh
deleted file mode 100644
index 870627d909..0000000000
--- a/cpp/include/raft/distance/specializations/detail/l1.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<ops::l1_distance_op<float, float, int>,
-                                                         int,
-                                                         float,
-                                                         float,
-                                                         raft::identity_op>(
-  ops::l1_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, raft::identity_op>,
-  cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<ops::l1_distance_op<double, double, int>,
-                                                         int,
-                                                         double,
-                                                         double,
-                                                         raft::identity_op>(
-  ops::l1_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, raft::identity_op>,
-  cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/l2_expanded.cuh b/cpp/include/raft/distance/specializations/detail/l2_expanded.cuh
deleted file mode 100644
index ee3207bcce..0000000000
--- a/cpp/include/raft/distance/specializations/detail/l2_expanded.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<ops::l2_exp_distance_op<float, float, int>,
-                                                         int,
-                                                         float,
-                                                         float,
-                                                         raft::identity_op>(
-  ops::l2_exp_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, raft::identity_op>,
-  cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::l2_exp_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::l2_exp_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh b/cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh
deleted file mode 100644
index 1fbf57632b..0000000000
--- a/cpp/include/raft/distance/specializations/detail/l2_unexpanded.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::l2_unexp_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::l2_unexp_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::l2_unexp_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::l2_unexp_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/l_inf.cuh b/cpp/include/raft/distance/specializations/detail/l_inf.cuh
deleted file mode 100644
index 388d3bf439..0000000000
--- a/cpp/include/raft/distance/specializations/detail/l_inf.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<ops::l_inf_distance_op<float, float, int>,
-                                                         int,
-                                                         float,
-                                                         float,
-                                                         raft::identity_op>(
-  ops::l_inf_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, raft::identity_op>,
-  cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::l_inf_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::l_inf_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh b/cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh
deleted file mode 100644
index d8e86ce6f2..0000000000
--- a/cpp/include/raft/distance/specializations/detail/lp_unexpanded.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::lp_unexp_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::lp_unexp_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::lp_unexp_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::lp_unexp_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/detail/russel_rao.cuh b/cpp/include/raft/distance/specializations/detail/russel_rao.cuh
deleted file mode 100644
index 4803fb8ab0..0000000000
--- a/cpp/include/raft/distance/specializations/detail/russel_rao.cuh
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/distance/detail/distance.cuh>
-
-namespace raft::distance::detail {
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::russel_rao_distance_op<float, float, int>,
-  int,
-  float,
-  float,
-  raft::identity_op>(ops::russel_rao_distance_op<float, float, int>,
-                     pairwise_matrix_params<int, float, float, raft::identity_op>,
-                     cudaStream_t);
-
-extern template void pairwise_matrix_instantiation_point<
-  ops::russel_rao_distance_op<double, double, int>,
-  int,
-  double,
-  double,
-  raft::identity_op>(ops::russel_rao_distance_op<double, double, int>,
-                     pairwise_matrix_params<int, double, double, raft::identity_op>,
-                     cudaStream_t);
-}  // namespace raft::distance::detail
diff --git a/cpp/include/raft/distance/specializations/distance.cuh b/cpp/include/raft/distance/specializations/distance.cuh
index a34f696e9e..ed0b6848ae 100644
--- a/cpp/include/raft/distance/specializations/distance.cuh
+++ b/cpp/include/raft/distance/specializations/distance.cuh
@@ -13,22 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/distance/specializations/detail/canberra.cuh>
-#include <raft/distance/specializations/detail/correlation.cuh>
-#include <raft/distance/specializations/detail/cosine.cuh>
-#include <raft/distance/specializations/detail/hamming_unexpanded.cuh>
-#include <raft/distance/specializations/detail/hellinger_expanded.cuh>
-#include <raft/distance/specializations/detail/inner_product.cuh>
-#include <raft/distance/specializations/detail/jensen_shannon.cuh>
-#include <raft/distance/specializations/detail/kernels.cuh>
-#include <raft/distance/specializations/detail/kl_divergence.cuh>
-#include <raft/distance/specializations/detail/l1.cuh>
-#include <raft/distance/specializations/detail/l2_expanded.cuh>
-#include <raft/distance/specializations/detail/l2_unexpanded.cuh>
-#include <raft/distance/specializations/detail/l_inf.cuh>
-#include <raft/distance/specializations/detail/lp_unexpanded.cuh>
-#include <raft/distance/specializations/detail/russel_rao.cuh>
-#include <raft/distance/specializations/fused_l2_nn_min.cuh>
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/distance/specializations/fused_l2_nn_min.cuh b/cpp/include/raft/distance/specializations/fused_l2_nn_min.cuh
index 88e1216635..9588a7f329 100644
--- a/cpp/include/raft/distance/specializations/fused_l2_nn_min.cuh
+++ b/cpp/include/raft/distance/specializations/fused_l2_nn_min.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,115 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/core/kvp.hpp>
-#include <raft/distance/fused_l2_nn.cuh>
-
-namespace raft {
-namespace distance {
-
-extern template void fusedL2NNMinReduce<float, raft::KeyValuePair<int, float>, int>(
-  raft::KeyValuePair<int, float>* min,
-  const float* x,
-  const float* y,
-  const float* xn,
-  const float* yn,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-extern template void fusedL2NNMinReduce<float, raft::KeyValuePair<int64_t, float>, int64_t>(
-  raft::KeyValuePair<int64_t, float>* min,
-  const float* x,
-  const float* y,
-  const float* xn,
-  const float* yn,
-  int64_t m,
-  int64_t n,
-  int64_t k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-extern template void fusedL2NNMinReduce<double, raft::KeyValuePair<int, double>, int>(
-  raft::KeyValuePair<int, double>* min,
-  const double* x,
-  const double* y,
-  const double* xn,
-  const double* yn,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-extern template void fusedL2NNMinReduce<double, raft::KeyValuePair<int64_t, double>, int64_t>(
-  raft::KeyValuePair<int64_t, double>* min,
-  const double* x,
-  const double* y,
-  const double* xn,
-  const double* yn,
-  int64_t m,
-  int64_t n,
-  int64_t k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-extern template void fusedL2NNMinReduce<float, float, int>(float* min,
-                                                           const float* x,
-                                                           const float* y,
-                                                           const float* xn,
-                                                           const float* yn,
-                                                           int m,
-                                                           int n,
-                                                           int k,
-                                                           void* workspace,
-                                                           bool sqrt,
-                                                           bool initOutBuffer,
-                                                           cudaStream_t stream);
-extern template void fusedL2NNMinReduce<float, float, int64_t>(float* min,
-                                                               const float* x,
-                                                               const float* y,
-                                                               const float* xn,
-                                                               const float* yn,
-                                                               int64_t m,
-                                                               int64_t n,
-                                                               int64_t k,
-                                                               void* workspace,
-                                                               bool sqrt,
-                                                               bool initOutBuffer,
-                                                               cudaStream_t stream);
-extern template void fusedL2NNMinReduce<double, double, int>(double* min,
-                                                             const double* x,
-                                                             const double* y,
-                                                             const double* xn,
-                                                             const double* yn,
-                                                             int m,
-                                                             int n,
-                                                             int k,
-                                                             void* workspace,
-                                                             bool sqrt,
-                                                             bool initOutBuffer,
-                                                             cudaStream_t stream);
-extern template void fusedL2NNMinReduce<double, double, int64_t>(double* min,
-                                                                 const double* x,
-                                                                 const double* y,
-                                                                 const double* xn,
-                                                                 const double* yn,
-                                                                 int64_t m,
-                                                                 int64_t n,
-                                                                 int64_t k,
-                                                                 void* workspace,
-                                                                 bool sqrt,
-                                                                 bool initOutBuffer,
-                                                                 cudaStream_t stream);
-
-}  // namespace distance
-}  // namespace raft
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh
new file mode 100644
index 0000000000..4800f2e3cf
--- /dev/null
+++ b/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/operators.hpp>
+
+// The explicit instantiation of raft::linalg::detail::coalescedReduction is not
+// forced because there would be too many instances. Instead, we cover the most
+// common instantiations with extern template instantiations below.
+
+#define instantiate_raft_linalg_detail_coalescedReduction(                              \
+  InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda)                      \
+  extern template void raft::linalg::detail::coalescedReduction(OutType* dots,          \
+                                                                const InType* data,     \
+                                                                IdxType D,              \
+                                                                IdxType N,              \
+                                                                OutType init,           \
+                                                                cudaStream_t stream,    \
+                                                                bool inplace,           \
+                                                                MainLambda main_op,     \
+                                                                ReduceLambda reduce_op, \
+                                                                FinalLambda final_op)
+
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::identity_op, raft::min_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::abs_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::abs_op, raft::max_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::abs_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::identity_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::identity_op, raft::min_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, long, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op);
+
+#undef instantiate_raft_linalg_detail_coalescedReduction
diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh
new file mode 100644
index 0000000000..5b01196cf4
--- /dev/null
+++ b/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cub/cub.cuh>
+#include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/util/cuda_utils.cuh>
+#include <rmm/device_uvector.hpp>
+
+namespace raft {
+namespace linalg {
+namespace detail {
+
+template <int warpSize, int rpb>
+struct ReductionThinPolicy {
+  static constexpr int LogicalWarpSize = warpSize;
+  static constexpr int RowsPerBlock    = rpb;
+  static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock;
+};
+
+template <typename Policy,
+          typename InType,
+          typename OutType,
+          typename IdxType,
+          typename MainLambda,
+          typename ReduceLambda,
+          typename FinalLambda>
+__global__ void __launch_bounds__(Policy::ThreadsPerBlock)
+  coalescedReductionThinKernel(OutType* dots,
+                               const InType* data,
+                               IdxType D,
+                               IdxType N,
+                               OutType init,
+                               MainLambda main_op,
+                               ReduceLambda reduce_op,
+                               FinalLambda final_op,
+                               bool inplace = false)
+{
+  IdxType i = threadIdx.y + (Policy::RowsPerBlock * static_cast<IdxType>(blockIdx.x));
+  if (i >= N) return;
+
+  OutType acc = init;
+  for (IdxType j = threadIdx.x; j < D; j += Policy::LogicalWarpSize) {
+    acc = reduce_op(acc, main_op(data[j + (D * i)], j));
+  }
+  acc = raft::logicalWarpReduce<Policy::LogicalWarpSize>(acc, reduce_op);
+  if (threadIdx.x == 0) {
+    if (inplace) {
+      dots[i] = final_op(reduce_op(dots[i], acc));
+    } else {
+      dots[i] = final_op(acc);
+    }
+  }
+}
+
+template <typename Policy,
+          typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionThin(OutType* dots,
+                            const InType* data,
+                            IdxType D,
+                            IdxType N,
+                            OutType init,
+                            cudaStream_t stream,
+                            bool inplace           = false,
+                            MainLambda main_op     = raft::identity_op(),
+                            ReduceLambda reduce_op = raft::add_op(),
+                            FinalLambda final_op   = raft::identity_op())
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "coalescedReductionThin<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock);
+  dim3 threads(Policy::LogicalWarpSize, Policy::RowsPerBlock, 1);
+  dim3 blocks(ceildiv<IdxType>(N, Policy::RowsPerBlock), 1, 1);
+  coalescedReductionThinKernel<Policy>
+    <<<blocks, threads, 0, stream>>>(dots, data, D, N, init, main_op, reduce_op, final_op, inplace);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+template <typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionThinDispatcher(OutType* dots,
+                                      const InType* data,
+                                      IdxType D,
+                                      IdxType N,
+                                      OutType init,
+                                      cudaStream_t stream,
+                                      bool inplace           = false,
+                                      MainLambda main_op     = raft::identity_op(),
+                                      ReduceLambda reduce_op = raft::add_op(),
+                                      FinalLambda final_op   = raft::identity_op())
+{
+  if (D <= IdxType(2)) {
+    coalescedReductionThin<ReductionThinPolicy<2, 64>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else if (D <= IdxType(4)) {
+    coalescedReductionThin<ReductionThinPolicy<4, 32>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else if (D <= IdxType(8)) {
+    coalescedReductionThin<ReductionThinPolicy<8, 16>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else if (D <= IdxType(16)) {
+    coalescedReductionThin<ReductionThinPolicy<16, 8>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else {
+    coalescedReductionThin<ReductionThinPolicy<32, 4>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  }
+}
+
+template <int TPB,
+          typename InType,
+          typename OutType,
+          typename IdxType,
+          typename MainLambda,
+          typename ReduceLambda,
+          typename FinalLambda>
+__global__ void __launch_bounds__(TPB) coalescedReductionMediumKernel(OutType* dots,
+                                                                      const InType* data,
+                                                                      IdxType D,
+                                                                      IdxType N,
+                                                                      OutType init,
+                                                                      MainLambda main_op,
+                                                                      ReduceLambda reduce_op,
+                                                                      FinalLambda final_op,
+                                                                      bool inplace = false)
+{
+  typedef cub::BlockReduce<OutType, TPB, cub::BLOCK_REDUCE_RAKING> BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  OutType thread_data = init;
+  IdxType rowStart    = blockIdx.x * D;
+  for (IdxType i = threadIdx.x; i < D; i += TPB) {
+    IdxType idx = rowStart + i;
+    thread_data = reduce_op(thread_data, main_op(data[idx], i));
+  }
+  OutType acc = BlockReduce(temp_storage).Reduce(thread_data, reduce_op);
+  if (threadIdx.x == 0) {
+    if (inplace) {
+      dots[blockIdx.x] = final_op(reduce_op(dots[blockIdx.x], acc));
+    } else {
+      dots[blockIdx.x] = final_op(acc);
+    }
+  }
+}
+
+template <int TPB,
+          typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionMedium(OutType* dots,
+                              const InType* data,
+                              IdxType D,
+                              IdxType N,
+                              OutType init,
+                              cudaStream_t stream,
+                              bool inplace           = false,
+                              MainLambda main_op     = raft::identity_op(),
+                              ReduceLambda reduce_op = raft::add_op(),
+                              FinalLambda final_op   = raft::identity_op())
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope("coalescedReductionMedium<%d>", TPB);
+  coalescedReductionMediumKernel<TPB>
+    <<<N, TPB, 0, stream>>>(dots, data, D, N, init, main_op, reduce_op, final_op, inplace);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+}
+
+template <typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionMediumDispatcher(OutType* dots,
+                                        const InType* data,
+                                        IdxType D,
+                                        IdxType N,
+                                        OutType init,
+                                        cudaStream_t stream,
+                                        bool inplace           = false,
+                                        MainLambda main_op     = raft::identity_op(),
+                                        ReduceLambda reduce_op = raft::add_op(),
+                                        FinalLambda final_op   = raft::identity_op())
+{
+  // Note: for now, this kernel is only used when D > 256. If this changes in the future, use
+  // smaller block sizes when relevant.
+  coalescedReductionMedium<256>(
+    dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+}
+
+template <int tpb, int bpr>
+struct ReductionThickPolicy {
+  static constexpr int ThreadsPerBlock = tpb;
+  static constexpr int BlocksPerRow    = bpr;
+  static constexpr int BlockStride     = tpb * bpr;
+};
+
+template <typename Policy,
+          typename InType,
+          typename OutType,
+          typename IdxType,
+          typename MainLambda,
+          typename ReduceLambda>
+__global__ void __launch_bounds__(Policy::ThreadsPerBlock)
+  coalescedReductionThickKernel(OutType* buffer,
+                                const InType* data,
+                                IdxType D,
+                                IdxType N,
+                                OutType init,
+                                MainLambda main_op,
+                                ReduceLambda reduce_op)
+{
+  typedef cub::BlockReduce<OutType, Policy::ThreadsPerBlock, cub::BLOCK_REDUCE_RAKING> BlockReduce;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  OutType thread_data = init;
+  IdxType rowStart    = blockIdx.x * D;
+  for (IdxType i = blockIdx.y * Policy::ThreadsPerBlock + threadIdx.x; i < D;
+       i += Policy::BlockStride) {
+    IdxType idx = rowStart + i;
+    thread_data = reduce_op(thread_data, main_op(data[idx], i));
+  }
+  OutType acc = BlockReduce(temp_storage).Reduce(thread_data, reduce_op);
+  if (threadIdx.x == 0) { buffer[Policy::BlocksPerRow * blockIdx.x + blockIdx.y] = acc; }
+}
+
+template <typename ThickPolicy,
+          typename ThinPolicy,
+          typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionThick(OutType* dots,
+                             const InType* data,
+                             IdxType D,
+                             IdxType N,
+                             OutType init,
+                             cudaStream_t stream,
+                             bool inplace           = false,
+                             MainLambda main_op     = raft::identity_op(),
+                             ReduceLambda reduce_op = raft::add_op(),
+                             FinalLambda final_op   = raft::identity_op())
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "coalescedReductionThick<%d,%d>", ThickPolicy::ThreadsPerBlock, ThickPolicy::BlocksPerRow);
+
+  dim3 threads(ThickPolicy::ThreadsPerBlock, 1, 1);
+  dim3 blocks(N, ThickPolicy::BlocksPerRow, 1);
+
+  rmm::device_uvector<OutType> buffer(N * ThickPolicy::BlocksPerRow, stream);
+
+  /* We apply a two-step reduction:
+   *  1. coalescedReductionThickKernel reduces the [N x D] input data to [N x BlocksPerRow]. It
+   *     applies the main_op but not the final op.
+   *  2. coalescedReductionThinKernel reduces [N x BlocksPerRow] to [N x 1]. It doesn't apply any
+   *     main_op but applies final_op. If in-place, the existing and new values are reduced.
+   */
+
+  coalescedReductionThickKernel<ThickPolicy>
+    <<<blocks, threads, 0, stream>>>(buffer.data(), data, D, N, init, main_op, reduce_op);
+  RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+  coalescedReductionThin<ThinPolicy>(dots,
+                                     buffer.data(),
+                                     static_cast<IdxType>(ThickPolicy::BlocksPerRow),
+                                     N,
+                                     init,
+                                     stream,
+                                     inplace,
+                                     raft::identity_op(),
+                                     reduce_op,
+                                     final_op);
+}
+
+template <typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReductionThickDispatcher(OutType* dots,
+                                       const InType* data,
+                                       IdxType D,
+                                       IdxType N,
+                                       OutType init,
+                                       cudaStream_t stream,
+                                       bool inplace           = false,
+                                       MainLambda main_op     = raft::identity_op(),
+                                       ReduceLambda reduce_op = raft::add_op(),
+                                       FinalLambda final_op   = raft::identity_op())
+{
+  // Note: multiple elements per thread to take advantage of the sequential reduction and loop
+  // unrolling
+  if (D < IdxType(32768)) {
+    coalescedReductionThick<ReductionThickPolicy<256, 32>, ReductionThinPolicy<32, 4>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else {
+    coalescedReductionThick<ReductionThickPolicy<256, 64>, ReductionThinPolicy<32, 4>>(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  }
+}
+
+// Primitive to perform reductions along the coalesced dimension of the matrix, i.e. reduce along
+// rows for row major or reduce along columns for column major layout. Can do an inplace reduction
+// adding to original values of dots if requested.
+template <typename InType,
+          typename OutType      = InType,
+          typename IdxType      = int,
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
+void coalescedReduction(OutType* dots,
+                        const InType* data,
+                        IdxType D,
+                        IdxType N,
+                        OutType init,
+                        cudaStream_t stream,
+                        bool inplace           = false,
+                        MainLambda main_op     = raft::identity_op(),
+                        ReduceLambda reduce_op = raft::add_op(),
+                        FinalLambda final_op   = raft::identity_op())
+{
+  /* The primitive selects one of three implementations based on heuristics:
+   *  - Thin: very efficient when D is small and/or N is large
+   *  - Thick: used when N is very small and D very large
+   *  - Medium: used when N is too small to fill the GPU with the thin kernel
+   */
+  const IdxType numSMs = raft::getMultiProcessorCount();
+  if (D <= IdxType(256) || N >= IdxType(4) * numSMs) {
+    coalescedReductionThinDispatcher(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else if (N < numSMs && D >= IdxType(16384)) {
+    coalescedReductionThickDispatcher(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  } else {
+    coalescedReductionMediumDispatcher(
+      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
+  }
+}
+
+}  // namespace detail
+}  // namespace linalg
+}  // namespace raft
diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
index 238e17fa56..3e6b17978b 100644
--- a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,353 +16,11 @@
 
 #pragma once
 
-#include <cub/cub.cuh>
-#include <raft/common/nvtx.hpp>
-#include <raft/core/operators.hpp>
-#include <raft/util/cuda_utils.cuh>
-#include <rmm/device_uvector.hpp>
+// Always include inline definitions of coalesced reduction, because we do not
+// force explicit instantion.
+#include "coalesced_reduction-inl.cuh"
 
-namespace raft {
-namespace linalg {
-namespace detail {
-
-template <int warpSize, int rpb>
-struct ReductionThinPolicy {
-  static constexpr int LogicalWarpSize = warpSize;
-  static constexpr int RowsPerBlock    = rpb;
-  static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock;
-};
-
-template <typename Policy,
-          typename InType,
-          typename OutType,
-          typename IdxType,
-          typename MainLambda,
-          typename ReduceLambda,
-          typename FinalLambda>
-__global__ void __launch_bounds__(Policy::ThreadsPerBlock)
-  coalescedReductionThinKernel(OutType* dots,
-                               const InType* data,
-                               IdxType D,
-                               IdxType N,
-                               OutType init,
-                               MainLambda main_op,
-                               ReduceLambda reduce_op,
-                               FinalLambda final_op,
-                               bool inplace = false)
-{
-  IdxType i = threadIdx.y + (Policy::RowsPerBlock * static_cast<IdxType>(blockIdx.x));
-  if (i >= N) return;
-
-  OutType acc = init;
-  for (IdxType j = threadIdx.x; j < D; j += Policy::LogicalWarpSize) {
-    acc = reduce_op(acc, main_op(data[j + (D * i)], j));
-  }
-  acc = raft::logicalWarpReduce<Policy::LogicalWarpSize>(acc, reduce_op);
-  if (threadIdx.x == 0) {
-    if (inplace) {
-      dots[i] = final_op(reduce_op(dots[i], acc));
-    } else {
-      dots[i] = final_op(acc);
-    }
-  }
-}
-
-template <typename Policy,
-          typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionThin(OutType* dots,
-                            const InType* data,
-                            IdxType D,
-                            IdxType N,
-                            OutType init,
-                            cudaStream_t stream,
-                            bool inplace           = false,
-                            MainLambda main_op     = raft::identity_op(),
-                            ReduceLambda reduce_op = raft::add_op(),
-                            FinalLambda final_op   = raft::identity_op())
-{
-  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
-    "coalescedReductionThin<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock);
-  dim3 threads(Policy::LogicalWarpSize, Policy::RowsPerBlock, 1);
-  dim3 blocks(ceildiv<IdxType>(N, Policy::RowsPerBlock), 1, 1);
-  coalescedReductionThinKernel<Policy>
-    <<<blocks, threads, 0, stream>>>(dots, data, D, N, init, main_op, reduce_op, final_op, inplace);
-  RAFT_CUDA_TRY(cudaPeekAtLastError());
-}
-
-template <typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionThinDispatcher(OutType* dots,
-                                      const InType* data,
-                                      IdxType D,
-                                      IdxType N,
-                                      OutType init,
-                                      cudaStream_t stream,
-                                      bool inplace           = false,
-                                      MainLambda main_op     = raft::identity_op(),
-                                      ReduceLambda reduce_op = raft::add_op(),
-                                      FinalLambda final_op   = raft::identity_op())
-{
-  if (D <= IdxType(2)) {
-    coalescedReductionThin<ReductionThinPolicy<2, 64>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else if (D <= IdxType(4)) {
-    coalescedReductionThin<ReductionThinPolicy<4, 32>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else if (D <= IdxType(8)) {
-    coalescedReductionThin<ReductionThinPolicy<8, 16>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else if (D <= IdxType(16)) {
-    coalescedReductionThin<ReductionThinPolicy<16, 8>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else {
-    coalescedReductionThin<ReductionThinPolicy<32, 4>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  }
-}
-
-template <int TPB,
-          typename InType,
-          typename OutType,
-          typename IdxType,
-          typename MainLambda,
-          typename ReduceLambda,
-          typename FinalLambda>
-__global__ void __launch_bounds__(TPB) coalescedReductionMediumKernel(OutType* dots,
-                                                                      const InType* data,
-                                                                      IdxType D,
-                                                                      IdxType N,
-                                                                      OutType init,
-                                                                      MainLambda main_op,
-                                                                      ReduceLambda reduce_op,
-                                                                      FinalLambda final_op,
-                                                                      bool inplace = false)
-{
-  typedef cub::BlockReduce<OutType, TPB, cub::BLOCK_REDUCE_RAKING> BlockReduce;
-  __shared__ typename BlockReduce::TempStorage temp_storage;
-  OutType thread_data = init;
-  IdxType rowStart    = blockIdx.x * D;
-  for (IdxType i = threadIdx.x; i < D; i += TPB) {
-    IdxType idx = rowStart + i;
-    thread_data = reduce_op(thread_data, main_op(data[idx], i));
-  }
-  OutType acc = BlockReduce(temp_storage).Reduce(thread_data, reduce_op);
-  if (threadIdx.x == 0) {
-    if (inplace) {
-      dots[blockIdx.x] = final_op(reduce_op(dots[blockIdx.x], acc));
-    } else {
-      dots[blockIdx.x] = final_op(acc);
-    }
-  }
-}
-
-template <int TPB,
-          typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionMedium(OutType* dots,
-                              const InType* data,
-                              IdxType D,
-                              IdxType N,
-                              OutType init,
-                              cudaStream_t stream,
-                              bool inplace           = false,
-                              MainLambda main_op     = raft::identity_op(),
-                              ReduceLambda reduce_op = raft::add_op(),
-                              FinalLambda final_op   = raft::identity_op())
-{
-  common::nvtx::range<common::nvtx::domain::raft> fun_scope("coalescedReductionMedium<%d>", TPB);
-  coalescedReductionMediumKernel<TPB>
-    <<<N, TPB, 0, stream>>>(dots, data, D, N, init, main_op, reduce_op, final_op, inplace);
-  RAFT_CUDA_TRY(cudaPeekAtLastError());
-}
-
-template <typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionMediumDispatcher(OutType* dots,
-                                        const InType* data,
-                                        IdxType D,
-                                        IdxType N,
-                                        OutType init,
-                                        cudaStream_t stream,
-                                        bool inplace           = false,
-                                        MainLambda main_op     = raft::identity_op(),
-                                        ReduceLambda reduce_op = raft::add_op(),
-                                        FinalLambda final_op   = raft::identity_op())
-{
-  // Note: for now, this kernel is only used when D > 256. If this changes in the future, use
-  // smaller block sizes when relevant.
-  coalescedReductionMedium<256>(
-    dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-}
-
-template <int tpb, int bpr>
-struct ReductionThickPolicy {
-  static constexpr int ThreadsPerBlock = tpb;
-  static constexpr int BlocksPerRow    = bpr;
-  static constexpr int BlockStride     = tpb * bpr;
-};
-
-template <typename Policy,
-          typename InType,
-          typename OutType,
-          typename IdxType,
-          typename MainLambda,
-          typename ReduceLambda>
-__global__ void __launch_bounds__(Policy::ThreadsPerBlock)
-  coalescedReductionThickKernel(OutType* buffer,
-                                const InType* data,
-                                IdxType D,
-                                IdxType N,
-                                OutType init,
-                                MainLambda main_op,
-                                ReduceLambda reduce_op)
-{
-  typedef cub::BlockReduce<OutType, Policy::ThreadsPerBlock, cub::BLOCK_REDUCE_RAKING> BlockReduce;
-  __shared__ typename BlockReduce::TempStorage temp_storage;
-  OutType thread_data = init;
-  IdxType rowStart    = blockIdx.x * D;
-  for (IdxType i = blockIdx.y * Policy::ThreadsPerBlock + threadIdx.x; i < D;
-       i += Policy::BlockStride) {
-    IdxType idx = rowStart + i;
-    thread_data = reduce_op(thread_data, main_op(data[idx], i));
-  }
-  OutType acc = BlockReduce(temp_storage).Reduce(thread_data, reduce_op);
-  if (threadIdx.x == 0) { buffer[Policy::BlocksPerRow * blockIdx.x + blockIdx.y] = acc; }
-}
-
-template <typename ThickPolicy,
-          typename ThinPolicy,
-          typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionThick(OutType* dots,
-                             const InType* data,
-                             IdxType D,
-                             IdxType N,
-                             OutType init,
-                             cudaStream_t stream,
-                             bool inplace           = false,
-                             MainLambda main_op     = raft::identity_op(),
-                             ReduceLambda reduce_op = raft::add_op(),
-                             FinalLambda final_op   = raft::identity_op())
-{
-  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
-    "coalescedReductionThick<%d,%d>", ThickPolicy::ThreadsPerBlock, ThickPolicy::BlocksPerRow);
-
-  dim3 threads(ThickPolicy::ThreadsPerBlock, 1, 1);
-  dim3 blocks(N, ThickPolicy::BlocksPerRow, 1);
-
-  rmm::device_uvector<OutType> buffer(N * ThickPolicy::BlocksPerRow, stream);
-
-  /* We apply a two-step reduction:
-   *  1. coalescedReductionThickKernel reduces the [N x D] input data to [N x BlocksPerRow]. It
-   *     applies the main_op but not the final op.
-   *  2. coalescedReductionThinKernel reduces [N x BlocksPerRow] to [N x 1]. It doesn't apply any
-   *     main_op but applies final_op. If in-place, the existing and new values are reduced.
-   */
-
-  coalescedReductionThickKernel<ThickPolicy>
-    <<<blocks, threads, 0, stream>>>(buffer.data(), data, D, N, init, main_op, reduce_op);
-  RAFT_CUDA_TRY(cudaPeekAtLastError());
-
-  coalescedReductionThin<ThinPolicy>(dots,
-                                     buffer.data(),
-                                     static_cast<IdxType>(ThickPolicy::BlocksPerRow),
-                                     N,
-                                     init,
-                                     stream,
-                                     inplace,
-                                     raft::identity_op(),
-                                     reduce_op,
-                                     final_op);
-}
-
-template <typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReductionThickDispatcher(OutType* dots,
-                                       const InType* data,
-                                       IdxType D,
-                                       IdxType N,
-                                       OutType init,
-                                       cudaStream_t stream,
-                                       bool inplace           = false,
-                                       MainLambda main_op     = raft::identity_op(),
-                                       ReduceLambda reduce_op = raft::add_op(),
-                                       FinalLambda final_op   = raft::identity_op())
-{
-  // Note: multiple elements per thread to take advantage of the sequential reduction and loop
-  // unrolling
-  if (D < IdxType(32768)) {
-    coalescedReductionThick<ReductionThickPolicy<256, 32>, ReductionThinPolicy<32, 4>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else {
-    coalescedReductionThick<ReductionThickPolicy<256, 64>, ReductionThinPolicy<32, 4>>(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  }
-}
-
-// Primitive to perform reductions along the coalesced dimension of the matrix, i.e. reduce along
-// rows for row major or reduce along columns for column major layout. Can do an inplace reduction
-// adding to original values of dots if requested.
-template <typename InType,
-          typename OutType      = InType,
-          typename IdxType      = int,
-          typename MainLambda   = raft::identity_op,
-          typename ReduceLambda = raft::add_op,
-          typename FinalLambda  = raft::identity_op>
-void coalescedReduction(OutType* dots,
-                        const InType* data,
-                        IdxType D,
-                        IdxType N,
-                        OutType init,
-                        cudaStream_t stream,
-                        bool inplace           = false,
-                        MainLambda main_op     = raft::identity_op(),
-                        ReduceLambda reduce_op = raft::add_op(),
-                        FinalLambda final_op   = raft::identity_op())
-{
-  /* The primitive selects one of three implementations based on heuristics:
-   *  - Thin: very efficient when D is small and/or N is large
-   *  - Thick: used when N is very small and D very large
-   *  - Medium: used when N is too small to fill the GPU with the thin kernel
-   */
-  const IdxType numSMs = raft::getMultiProcessorCount();
-  if (D <= IdxType(256) || N >= IdxType(4) * numSMs) {
-    coalescedReductionThinDispatcher(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else if (N < numSMs && D >= IdxType(16384)) {
-    coalescedReductionThickDispatcher(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  } else {
-    coalescedReductionMediumDispatcher(
-      dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
-  }
-}
-
-}  // namespace detail
-}  // namespace linalg
-}  // namespace raft
\ No newline at end of file
+// Do include the extern template instantiations when possible.
+#ifdef RAFT_COMPILED
+#include "coalesced_reduction-ext.cuh"
+#endif
diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh
new file mode 100644
index 0000000000..2b233c156d
--- /dev/null
+++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                                   // uint32_t
+#include <cuda_fp16.h>                               // __half
+#include <raft/util/raft_explicit.hpp>               // RAFT_EXPLICIT
+#include <rmm/cuda_stream_view.hpp>                  // rmm:cuda_stream_view
+#include <rmm/mr/device/device_memory_resource.hpp>  // rmm::mr::device_memory_resource
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::matrix::detail {
+
+template <typename T, typename IdxT>
+void select_k(const T* in_val,
+              const IdxT* in_idx,
+              size_t batch_size,
+              size_t len,
+              int k,
+              T* out_val,
+              IdxT* out_idx,
+              bool select_min,
+              rmm::cuda_stream_view stream,
+              rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
+}  // namespace raft::matrix::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                            \
+  extern template void raft::matrix::detail::select_k(const T* in_val,              \
+                                                      const IdxT* in_idx,           \
+                                                      size_t batch_size,            \
+                                                      size_t len,                   \
+                                                      int k,                        \
+                                                      T* out_val,                   \
+                                                      IdxT* out_idx,                \
+                                                      bool select_min,              \
+                                                      rmm::cuda_stream_view stream, \
+                                                      rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(__half, uint32_t);
+instantiate_raft_matrix_detail_select_k(__half, int64_t);
+instantiate_raft_matrix_detail_select_k(float, int64_t);
+instantiate_raft_matrix_detail_select_k(float, uint32_t);
+// We did not have these two for double before, but there are tests for them. We
+// therefore include them here.
+instantiate_raft_matrix_detail_select_k(double, int64_t);
+instantiate_raft_matrix_detail_select_k(double, uint32_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/include/raft/matrix/detail/select_k-inl.cuh b/cpp/include/raft/matrix/detail/select_k-inl.cuh
new file mode 100644
index 0000000000..20c2fb119d
--- /dev/null
+++ b/cpp/include/raft/matrix/detail/select_k-inl.cuh
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "select_radix.cuh"
+#include "select_warpsort.cuh"
+
+#include <raft/core/nvtx.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+namespace raft::matrix::detail {
+
+/**
+ * Select k smallest or largest key/values from each row in the input data.
+ *
+ * If you think of the input data `in_val` as a row-major matrix with `len` columns and
+ * `batch_size` rows, then this function selects `k` smallest/largest values in each row and fills
+ * in the row-major matrix `out_val` of size (batch_size, k).
+ *
+ * @tparam T
+ *   the type of the keys (what is being compared).
+ * @tparam IdxT
+ *   the index type (what is being selected together with the keys).
+ *
+ * @param[in] in_val
+ *   contiguous device array of inputs of size (len * batch_size);
+ *   these are compared and selected.
+ * @param[in] in_idx
+ *   contiguous device array of inputs of size (len * batch_size);
+ *   typically, these are indices of the corresponding in_val.
+ * @param batch_size
+ *   number of input rows, i.e. the batch size.
+ * @param len
+ *   length of a single input array (row); also sometimes referred as n_cols.
+ *   Invariant: len >= k.
+ * @param k
+ *   the number of outputs to select in each input row.
+ * @param[out] out_val
+ *   contiguous device array of outputs of size (k * batch_size);
+ *   the k smallest/largest values from each row of the `in_val`.
+ * @param[out] out_idx
+ *   contiguous device array of outputs of size (k * batch_size);
+ *   the payload selected together with `out_val`.
+ * @param select_min
+ *   whether to select k smallest (true) or largest (false) keys.
+ * @param stream
+ * @param mr an optional memory resource to use across the calls (you can provide a large enough
+ *           memory pool here to avoid memory allocations within the call).
+ */
+template <typename T, typename IdxT>
+void select_k(const T* in_val,
+              const IdxT* in_idx,
+              size_t batch_size,
+              size_t len,
+              int k,
+              T* out_val,
+              IdxT* out_idx,
+              bool select_min,
+              rmm::cuda_stream_view stream,
+              rmm::mr::device_memory_resource* mr = nullptr)
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "matrix::select_k(batch_size = %zu, len = %zu, k = %d)", batch_size, len, k);
+  // TODO (achirkin): investigate the trade-off for a wider variety of inputs.
+  const bool radix_faster = batch_size >= 64 && len >= 102400 && k >= 128;
+  if (k <= select::warpsort::kMaxCapacity && !radix_faster) {
+    select::warpsort::select_k<T, IdxT>(
+      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, stream, mr);
+  } else {
+    select::radix::select_k<T, IdxT, (sizeof(T) >= 4 ? 11 : 8), 512>(
+      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, true, stream, mr);
+  }
+}
+
+}  // namespace raft::matrix::detail
diff --git a/cpp/include/raft/matrix/detail/select_k.cuh b/cpp/include/raft/matrix/detail/select_k.cuh
index 20c2fb119d..711169984b 100644
--- a/cpp/include/raft/matrix/detail/select_k.cuh
+++ b/cpp/include/raft/matrix/detail/select_k.cuh
@@ -13,79 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include "select_radix.cuh"
-#include "select_warpsort.cuh"
-
-#include <raft/core/nvtx.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-
-namespace raft::matrix::detail {
-
-/**
- * Select k smallest or largest key/values from each row in the input data.
- *
- * If you think of the input data `in_val` as a row-major matrix with `len` columns and
- * `batch_size` rows, then this function selects `k` smallest/largest values in each row and fills
- * in the row-major matrix `out_val` of size (batch_size, k).
- *
- * @tparam T
- *   the type of the keys (what is being compared).
- * @tparam IdxT
- *   the index type (what is being selected together with the keys).
- *
- * @param[in] in_val
- *   contiguous device array of inputs of size (len * batch_size);
- *   these are compared and selected.
- * @param[in] in_idx
- *   contiguous device array of inputs of size (len * batch_size);
- *   typically, these are indices of the corresponding in_val.
- * @param batch_size
- *   number of input rows, i.e. the batch size.
- * @param len
- *   length of a single input array (row); also sometimes referred as n_cols.
- *   Invariant: len >= k.
- * @param k
- *   the number of outputs to select in each input row.
- * @param[out] out_val
- *   contiguous device array of outputs of size (k * batch_size);
- *   the k smallest/largest values from each row of the `in_val`.
- * @param[out] out_idx
- *   contiguous device array of outputs of size (k * batch_size);
- *   the payload selected together with `out_val`.
- * @param select_min
- *   whether to select k smallest (true) or largest (false) keys.
- * @param stream
- * @param mr an optional memory resource to use across the calls (you can provide a large enough
- *           memory pool here to avoid memory allocations within the call).
- */
-template <typename T, typename IdxT>
-void select_k(const T* in_val,
-              const IdxT* in_idx,
-              size_t batch_size,
-              size_t len,
-              int k,
-              T* out_val,
-              IdxT* out_idx,
-              bool select_min,
-              rmm::cuda_stream_view stream,
-              rmm::mr::device_memory_resource* mr = nullptr)
-{
-  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
-    "matrix::select_k(batch_size = %zu, len = %zu, k = %d)", batch_size, len, k);
-  // TODO (achirkin): investigate the trade-off for a wider variety of inputs.
-  const bool radix_faster = batch_size >= 64 && len >= 102400 && k >= 128;
-  if (k <= select::warpsort::kMaxCapacity && !radix_faster) {
-    select::warpsort::select_k<T, IdxT>(
-      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, stream, mr);
-  } else {
-    select::radix::select_k<T, IdxT, (sizeof(T) >= 4 ? 11 : 8), 512>(
-      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, true, stream, mr);
-  }
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "select_k-inl.cuh"
+#endif
 
-}  // namespace raft::matrix::detail
+#ifdef RAFT_COMPILED
+#include "select_k-ext.cuh"
+#endif
diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh
index 93d405da48..c19e9391ce 100644
--- a/cpp/include/raft/matrix/detail/select_warpsort.cuh
+++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh
@@ -27,7 +27,7 @@
 #include <functional>
 #include <type_traits>
 
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 /*
diff --git a/cpp/include/raft/matrix/specializations.cuh b/cpp/include/raft/matrix/specializations.cuh
index 07bdeab507..ac3b80e8d9 100644
--- a/cpp/include/raft/matrix/specializations.cuh
+++ b/cpp/include/raft/matrix/specializations.cuh
@@ -13,7 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/matrix/specializations/detail/select_k.cuh>
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/matrix/specializations/detail/select_k.cuh b/cpp/include/raft/matrix/specializations/detail/select_k.cuh
index 3cb1a2d8dc..ac3b80e8d9 100644
--- a/cpp/include/raft/matrix/specializations/detail/select_k.cuh
+++ b/cpp/include/raft/matrix/specializations/detail/select_k.cuh
@@ -13,35 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/matrix/detail/select_k.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::matrix::detail {
-
-#define RAFT_INST(T, IdxT)                                      \
-  extern template void select_k<T, IdxT>(const T*,              \
-                                         const IdxT*,           \
-                                         size_t,                \
-                                         size_t,                \
-                                         int,                   \
-                                         T*,                    \
-                                         IdxT*,                 \
-                                         bool,                  \
-                                         rmm::cuda_stream_view, \
-                                         rmm::mr::device_memory_resource*);
-
-// Commonly used types
-RAFT_INST(float, int64_t);
-RAFT_INST(half, int64_t);
-
-// These instances are used in the ivf_pq::search parameterized by the internal_distance_dtype
-RAFT_INST(float, uint32_t);
-RAFT_INST(half, uint32_t);
-
-#undef RAFT_INST
-
-}  // namespace raft::matrix::detail
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/ball_cover-ext.cuh b/cpp/include/raft/neighbors/ball_cover-ext.cuh
new file mode 100644
index 0000000000..b6ab12d8e1
--- /dev/null
+++ b/cpp/include/raft/neighbors/ball_cover-ext.cuh
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cstdint>                              // uint32_t
+#include <raft/distance/distance_types.hpp>     // raft::distance::DistanceType
+#include <raft/neighbors/ball_cover_types.hpp>  // BallCoverIndex
+#include <raft/util/raft_explicit.hpp>          // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ball_cover {
+
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void build_index(raft::device_resources const& handle,
+                 BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index) RAFT_EXPLICIT;
+
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void all_knn_query(raft::device_resources const& handle,
+                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+                   int_t k,
+                   idx_t* inds,
+                   value_t* dists,
+                   bool perform_post_filtering = true,
+                   float weight                = 1.0) RAFT_EXPLICIT;
+
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void all_knn_query(raft::device_resources const& handle,
+                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+                   raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
+                   raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
+                   int_t k,
+                   bool perform_post_filtering = true,
+                   float weight                = 1.0) RAFT_EXPLICIT;
+
+template <typename idx_t, typename value_t, typename int_t>
+void knn_query(raft::device_resources const& handle,
+               const BallCoverIndex<idx_t, value_t, int_t>& index,
+               int_t k,
+               const value_t* query,
+               int_t n_query_pts,
+               idx_t* inds,
+               value_t* dists,
+               bool perform_post_filtering = true,
+               float weight                = 1.0) RAFT_EXPLICIT;
+
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void knn_query(raft::device_resources const& handle,
+               const BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+               raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,
+               raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
+               raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
+               int_t k,
+               bool perform_post_filtering = true,
+               float weight                = 1.0) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ball_cover
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ball_cover(idx_t, value_t, int_t, matrix_idx_t)                 \
+  extern template void                                                                             \
+  raft::neighbors::ball_cover::build_index<idx_t, value_t, int_t, matrix_idx_t>(                   \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index);      \
+                                                                                                   \
+  extern template void                                                                             \
+  raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(                 \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
+    int_t k,                                                                                       \
+    idx_t* inds,                                                                                   \
+    value_t* dists,                                                                                \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  extern template void                                                                             \
+  raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(                 \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
+    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
+    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
+    int_t k,                                                                                       \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  extern template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t>(              \
+    raft::device_resources const& handle,                                                          \
+    const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t>& index,               \
+    int_t k,                                                                                       \
+    const value_t* query,                                                                          \
+    int_t n_query_pts,                                                                             \
+    idx_t* inds,                                                                                   \
+    value_t* dists,                                                                                \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  extern template void                                                                             \
+  raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t, matrix_idx_t>(                     \
+    raft::device_resources const& handle,                                                          \
+    const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index, \
+    raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,                        \
+    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
+    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
+    int_t k,                                                                                       \
+    bool perform_post_filtering,                                                                   \
+    float weight);
+
+instantiate_raft_neighbors_ball_cover(int64_t, float, uint32_t, uint32_t);
+
+#undef instantiate_raft_neighbors_ball_cover
diff --git a/cpp/include/raft/neighbors/ball_cover-inl.cuh b/cpp/include/raft/neighbors/ball_cover-inl.cuh
new file mode 100644
index 0000000000..619c57a35a
--- /dev/null
+++ b/cpp/include/raft/neighbors/ball_cover-inl.cuh
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __BALL_COVER_H
+#define __BALL_COVER_H
+
+#pragma once
+
+#include <cstdint>
+
+#include <raft/distance/distance_types.hpp>
+#include <raft/neighbors/ball_cover_types.hpp>
+#include <raft/spatial/knn/detail/ball_cover.cuh>
+#include <raft/spatial/knn/detail/ball_cover/common.cuh>
+#include <thrust/transform.h>
+
+namespace raft::neighbors::ball_cover {
+
+/**
+ * @defgroup random_ball_cover Random Ball Cover algorithm
+ * @{
+ */
+
+/**
+ * Builds and populates a previously unbuilt BallCoverIndex
+ *
+ * Usage example:
+ * @code{.cpp}
+ *
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/ball_cover.cuh>
+ *  #include <raft/distance/distance_types.hpp>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  auto metric = raft::distance::DistanceType::L2Expanded;
+ *  BallCoverIndex index(handle, X, metric);
+ *
+ *  ball_cover::build_index(handle, index);
+ * @endcode
+ *
+ * @tparam idx_t knn index type
+ * @tparam value_t knn value type
+ * @tparam int_t integral type for knn params
+ * @tparam matrix_idx_t matrix indexing type
+ * @param[in] handle library resource management handle
+ * @param[inout] index an empty (and not previous built) instance of BallCoverIndex
+ */
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void build_index(raft::device_resources const& handle,
+                 BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index)
+{
+  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
+  if (index.metric == raft::distance::DistanceType::Haversine) {
+    raft::spatial::knn::detail::rbc_build_index(
+      handle, index, spatial::knn::detail::HaversineFunc<value_t, int_t>());
+  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
+             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
+    raft::spatial::knn::detail::rbc_build_index(
+      handle, index, spatial::knn::detail::EuclideanFunc<value_t, int_t>());
+  } else {
+    RAFT_FAIL("Metric not support");
+  }
+
+  index.set_index_trained();
+}
+
+/** @} */  // end group random_ball_cover
+
+/**
+ * Performs a faster exact knn in metric spaces using the triangle
+ * inequality with a number of landmark points to reduce the
+ * number of distance computations from O(n^2) to O(sqrt(n)). This
+ * performs an all neighbors knn, which can reuse memory when
+ * the index and query are the same array. This function will
+ * build the index and assumes rbc_build_index() has not already
+ * been called.
+ * @tparam idx_t knn index type
+ * @tparam value_t knn distance type
+ * @tparam int_t type for integers, such as number of rows/cols
+ * @param[in] handle raft handle for resource management
+ * @param[inout] index ball cover index which has not yet been built
+ * @param[in] k number of nearest neighbors to find
+ * @param[in] perform_post_filtering if this is false, only the closest k landmarks
+ *                               are considered (which will return approximate
+ *                               results).
+ * @param[out] inds output knn indices
+ * @param[out] dists output knn distances
+ * @param[in] weight a weight for overlap between the closest landmark and
+ *               the radius of other landmarks when pruning distances.
+ *               Setting this value below 1 can effectively turn off
+ *               computing distances against many other balls, enabling
+ *               approximate nearest neighbors. Recall can be adjusted
+ *               based on how many relevant balls are ignored. Note that
+ *               many datasets can still have great recall even by only
+ *               looking in the closest landmark.
+ */
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void all_knn_query(raft::device_resources const& handle,
+                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+                   int_t k,
+                   idx_t* inds,
+                   value_t* dists,
+                   bool perform_post_filtering = true,
+                   float weight                = 1.0)
+{
+  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
+  if (index.metric == raft::distance::DistanceType::Haversine) {
+    raft::spatial::knn::detail::rbc_all_knn_query(
+      handle,
+      index,
+      k,
+      inds,
+      dists,
+      spatial::knn::detail::HaversineFunc<value_t, int_t>(),
+      perform_post_filtering,
+      weight);
+  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
+             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
+    raft::spatial::knn::detail::rbc_all_knn_query(
+      handle,
+      index,
+      k,
+      inds,
+      dists,
+      spatial::knn::detail::EuclideanFunc<value_t, int_t>(),
+      perform_post_filtering,
+      weight);
+  } else {
+    RAFT_FAIL("Metric not supported");
+  }
+
+  index.set_index_trained();
+}
+
+/**
+ * @ingroup random_ball_cover
+ * @{
+ */
+
+/**
+ * Performs a faster exact knn in metric spaces using the triangle
+ * inequality with a number of landmark points to reduce the
+ * number of distance computations from O(n^2) to O(sqrt(n)). This
+ * performs an all neighbors knn, which can reuse memory when
+ * the index and query are the same array. This function will
+ * build the index and assumes rbc_build_index() has not already
+ * been called.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/ball_cover.cuh>
+ *  #include <raft/distance/distance_types.hpp>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  auto metric = raft::distance::DistanceType::L2Expanded;
+ *
+ *  // Construct a ball cover index
+ *  BallCoverIndex index(handle, X, metric);
+ *
+ *  // Perform all neighbors knn query
+ *  ball_cover::all_knn_query(handle, index, inds, dists, k);
+ * @endcode
+ *
+ * @tparam idx_t knn index type
+ * @tparam value_t knn distance type
+ * @tparam int_t type for integers, such as number of rows/cols
+ * @tparam matrix_idx_t matrix indexing type
+ *
+ * @param[in] handle raft handle for resource management
+ * @param[in] index ball cover index which has not yet been built
+ * @param[out] inds output knn indices
+ * @param[out] dists output knn distances
+ * @param[in] k number of nearest neighbors to find
+ * @param[in] perform_post_filtering if this is false, only the closest k landmarks
+ *                               are considered (which will return approximate
+ *                               results).
+ * @param[in] weight a weight for overlap between the closest landmark and
+ *               the radius of other landmarks when pruning distances.
+ *               Setting this value below 1 can effectively turn off
+ *               computing distances against many other balls, enabling
+ *               approximate nearest neighbors. Recall can be adjusted
+ *               based on how many relevant balls are ignored. Note that
+ *               many datasets can still have great recall even by only
+ *               looking in the closest landmark.
+ */
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void all_knn_query(raft::device_resources const& handle,
+                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+                   raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
+                   raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
+                   int_t k,
+                   bool perform_post_filtering = true,
+                   float weight                = 1.0)
+{
+  RAFT_EXPECTS(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
+  RAFT_EXPECTS(k <= index.m,
+               "k must be less than or equal to the number of data points in the index");
+  RAFT_EXPECTS(inds.extent(1) == dists.extent(1) && dists.extent(1) == static_cast<matrix_idx_t>(k),
+               "Number of columns in output indices and distances matrices must be equal to k");
+
+  RAFT_EXPECTS(inds.extent(0) == dists.extent(0) && dists.extent(0) == index.get_X().extent(0),
+               "Number of rows in output indices and distances matrices must equal number of rows "
+               "in index matrix.");
+
+  all_knn_query(
+    handle, index, k, inds.data_handle(), dists.data_handle(), perform_post_filtering, weight);
+}
+
+/** @} */
+
+/**
+ * Performs a faster exact knn in metric spaces using the triangle
+ * inequality with a number of landmark points to reduce the
+ * number of distance computations from O(n^2) to O(sqrt(n)). This
+ * function does not build the index and assumes rbc_build_index() has
+ * already been called. Use this function when the index and
+ * query arrays are different, otherwise use rbc_all_knn_query().
+ * @tparam idx_t index type
+ * @tparam value_t distances type
+ * @tparam int_t integer type for size info
+ * @param[in] handle raft handle for resource management
+ * @param[inout] index ball cover index which has not yet been built
+ * @param[in] k number of nearest neighbors to find
+ * @param[in] query the
+ * @param[in] perform_post_filtering if this is false, only the closest k landmarks
+ *                               are considered (which will return approximate
+ *                               results).
+ * @param[out] inds output knn indices
+ * @param[out] dists output knn distances
+ * @param[in] weight a weight for overlap between the closest landmark and
+ *               the radius of other landmarks when pruning distances.
+ *               Setting this value below 1 can effectively turn off
+ *               computing distances against many other balls, enabling
+ *               approximate nearest neighbors. Recall can be adjusted
+ *               based on how many relevant balls are ignored. Note that
+ *               many datasets can still have great recall even by only
+ *               looking in the closest landmark.
+ * @param[in] n_query_pts number of query points
+ */
+template <typename idx_t, typename value_t, typename int_t>
+void knn_query(raft::device_resources const& handle,
+               const BallCoverIndex<idx_t, value_t, int_t>& index,
+               int_t k,
+               const value_t* query,
+               int_t n_query_pts,
+               idx_t* inds,
+               value_t* dists,
+               bool perform_post_filtering = true,
+               float weight                = 1.0)
+{
+  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
+  if (index.metric == raft::distance::DistanceType::Haversine) {
+    raft::spatial::knn::detail::rbc_knn_query(handle,
+                                              index,
+                                              k,
+                                              query,
+                                              n_query_pts,
+                                              inds,
+                                              dists,
+                                              spatial::knn::detail::HaversineFunc<value_t, int_t>(),
+                                              perform_post_filtering,
+                                              weight);
+  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
+             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
+    raft::spatial::knn::detail::rbc_knn_query(handle,
+                                              index,
+                                              k,
+                                              query,
+                                              n_query_pts,
+                                              inds,
+                                              dists,
+                                              spatial::knn::detail::EuclideanFunc<value_t, int_t>(),
+                                              perform_post_filtering,
+                                              weight);
+  } else {
+    RAFT_FAIL("Metric not supported");
+  }
+}
+
+/**
+ * @ingroup random_ball_cover
+ * @{
+ */
+
+/**
+ * Performs a faster exact knn in metric spaces using the triangle
+ * inequality with a number of landmark points to reduce the
+ * number of distance computations from O(n^2) to O(sqrt(n)). This
+ * function does not build the index and assumes rbc_build_index() has
+ * already been called. Use this function when the index and
+ * query arrays are different, otherwise use rbc_all_knn_query().
+ *
+ * Usage example:
+ * @code{.cpp}
+ *
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/ball_cover.cuh>
+ *  #include <raft/distance/distance_types.hpp>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  auto metric = raft::distance::DistanceType::L2Expanded;
+ *
+ *  // Build a ball cover index
+ *  BallCoverIndex index(handle, X, metric);
+ *  ball_cover::build_index(handle, index);
+ *
+ *  // Perform all neighbors knn query
+ *  ball_cover::knn_query(handle, index, inds, dists, k);
+ * @endcode
+
+ *
+ * @tparam idx_t index type
+ * @tparam value_t distances type
+ * @tparam int_t integer type for size info
+ * @tparam matrix_idx_t
+ * @param[in] handle raft handle for resource management
+ * @param[in] index ball cover index which has not yet been built
+ * @param[in] query device matrix containing query data points
+ * @param[out] inds output knn indices
+ * @param[out] dists output knn distances
+ * @param[in] k number of nearest neighbors to find
+ * @param[in] perform_post_filtering if this is false, only the closest k landmarks
+ *                               are considered (which will return approximate
+ *                               results).
+ * @param[in] weight a weight for overlap between the closest landmark and
+ *               the radius of other landmarks when pruning distances.
+ *               Setting this value below 1 can effectively turn off
+ *               computing distances against many other balls, enabling
+ *               approximate nearest neighbors. Recall can be adjusted
+ *               based on how many relevant balls are ignored. Note that
+ *               many datasets can still have great recall even by only
+ *               looking in the closest landmark.
+ */
+template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
+void knn_query(raft::device_resources const& handle,
+               const BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
+               raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,
+               raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
+               raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
+               int_t k,
+               bool perform_post_filtering = true,
+               float weight                = 1.0)
+{
+  RAFT_EXPECTS(k <= index.m,
+               "k must be less than or equal to the number of data points in the index");
+  RAFT_EXPECTS(inds.extent(1) == dists.extent(1) && dists.extent(1) == static_cast<idx_t>(k),
+               "Number of columns in output indices and distances matrices must be equal to k");
+
+  RAFT_EXPECTS(inds.extent(0) == dists.extent(0) && dists.extent(0) == query.extent(0),
+               "Number of rows in output indices and distances matrices must equal number of rows "
+               "in search matrix.");
+
+  RAFT_EXPECTS(query.extent(1) == index.get_X().extent(1),
+               "Number of columns in query and index matrices must match.");
+
+  knn_query(handle,
+            index,
+            k,
+            query.data_handle(),
+            query.extent(0),
+            inds.data_handle(),
+            dists.data_handle(),
+            perform_post_filtering,
+            weight);
+}
+
+/** @} */
+
+// TODO: implement functions for:
+//  4. rbc_eps_neigh() - given a populated index, perform query against different query array
+//  5. rbc_all_eps_neigh() - populate a BallCoverIndex and query against training data
+
+}  // namespace raft::neighbors::ball_cover
+
+#endif
diff --git a/cpp/include/raft/neighbors/ball_cover.cuh b/cpp/include/raft/neighbors/ball_cover.cuh
index 619c57a35a..41c5d0310c 100644
--- a/cpp/include/raft/neighbors/ball_cover.cuh
+++ b/cpp/include/raft/neighbors/ball_cover.cuh
@@ -13,383 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef __BALL_COVER_H
-#define __BALL_COVER_H
-
 #pragma once
 
-#include <cstdint>
-
-#include <raft/distance/distance_types.hpp>
-#include <raft/neighbors/ball_cover_types.hpp>
-#include <raft/spatial/knn/detail/ball_cover.cuh>
-#include <raft/spatial/knn/detail/ball_cover/common.cuh>
-#include <thrust/transform.h>
-
-namespace raft::neighbors::ball_cover {
-
-/**
- * @defgroup random_ball_cover Random Ball Cover algorithm
- * @{
- */
-
-/**
- * Builds and populates a previously unbuilt BallCoverIndex
- *
- * Usage example:
- * @code{.cpp}
- *
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/ball_cover.cuh>
- *  #include <raft/distance/distance_types.hpp>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  auto metric = raft::distance::DistanceType::L2Expanded;
- *  BallCoverIndex index(handle, X, metric);
- *
- *  ball_cover::build_index(handle, index);
- * @endcode
- *
- * @tparam idx_t knn index type
- * @tparam value_t knn value type
- * @tparam int_t integral type for knn params
- * @tparam matrix_idx_t matrix indexing type
- * @param[in] handle library resource management handle
- * @param[inout] index an empty (and not previous built) instance of BallCoverIndex
- */
-template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void build_index(raft::device_resources const& handle,
-                 BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index)
-{
-  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
-  if (index.metric == raft::distance::DistanceType::Haversine) {
-    raft::spatial::knn::detail::rbc_build_index(
-      handle, index, spatial::knn::detail::HaversineFunc<value_t, int_t>());
-  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
-             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
-    raft::spatial::knn::detail::rbc_build_index(
-      handle, index, spatial::knn::detail::EuclideanFunc<value_t, int_t>());
-  } else {
-    RAFT_FAIL("Metric not support");
-  }
-
-  index.set_index_trained();
-}
-
-/** @} */  // end group random_ball_cover
-
-/**
- * Performs a faster exact knn in metric spaces using the triangle
- * inequality with a number of landmark points to reduce the
- * number of distance computations from O(n^2) to O(sqrt(n)). This
- * performs an all neighbors knn, which can reuse memory when
- * the index and query are the same array. This function will
- * build the index and assumes rbc_build_index() has not already
- * been called.
- * @tparam idx_t knn index type
- * @tparam value_t knn distance type
- * @tparam int_t type for integers, such as number of rows/cols
- * @param[in] handle raft handle for resource management
- * @param[inout] index ball cover index which has not yet been built
- * @param[in] k number of nearest neighbors to find
- * @param[in] perform_post_filtering if this is false, only the closest k landmarks
- *                               are considered (which will return approximate
- *                               results).
- * @param[out] inds output knn indices
- * @param[out] dists output knn distances
- * @param[in] weight a weight for overlap between the closest landmark and
- *               the radius of other landmarks when pruning distances.
- *               Setting this value below 1 can effectively turn off
- *               computing distances against many other balls, enabling
- *               approximate nearest neighbors. Recall can be adjusted
- *               based on how many relevant balls are ignored. Note that
- *               many datasets can still have great recall even by only
- *               looking in the closest landmark.
- */
-template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
-                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
-                   int_t k,
-                   idx_t* inds,
-                   value_t* dists,
-                   bool perform_post_filtering = true,
-                   float weight                = 1.0)
-{
-  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
-  if (index.metric == raft::distance::DistanceType::Haversine) {
-    raft::spatial::knn::detail::rbc_all_knn_query(
-      handle,
-      index,
-      k,
-      inds,
-      dists,
-      spatial::knn::detail::HaversineFunc<value_t, int_t>(),
-      perform_post_filtering,
-      weight);
-  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
-             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
-    raft::spatial::knn::detail::rbc_all_knn_query(
-      handle,
-      index,
-      k,
-      inds,
-      dists,
-      spatial::knn::detail::EuclideanFunc<value_t, int_t>(),
-      perform_post_filtering,
-      weight);
-  } else {
-    RAFT_FAIL("Metric not supported");
-  }
-
-  index.set_index_trained();
-}
-
-/**
- * @ingroup random_ball_cover
- * @{
- */
-
-/**
- * Performs a faster exact knn in metric spaces using the triangle
- * inequality with a number of landmark points to reduce the
- * number of distance computations from O(n^2) to O(sqrt(n)). This
- * performs an all neighbors knn, which can reuse memory when
- * the index and query are the same array. This function will
- * build the index and assumes rbc_build_index() has not already
- * been called.
- *
- * Usage example:
- * @code{.cpp}
- *
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/ball_cover.cuh>
- *  #include <raft/distance/distance_types.hpp>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  auto metric = raft::distance::DistanceType::L2Expanded;
- *
- *  // Construct a ball cover index
- *  BallCoverIndex index(handle, X, metric);
- *
- *  // Perform all neighbors knn query
- *  ball_cover::all_knn_query(handle, index, inds, dists, k);
- * @endcode
- *
- * @tparam idx_t knn index type
- * @tparam value_t knn distance type
- * @tparam int_t type for integers, such as number of rows/cols
- * @tparam matrix_idx_t matrix indexing type
- *
- * @param[in] handle raft handle for resource management
- * @param[in] index ball cover index which has not yet been built
- * @param[out] inds output knn indices
- * @param[out] dists output knn distances
- * @param[in] k number of nearest neighbors to find
- * @param[in] perform_post_filtering if this is false, only the closest k landmarks
- *                               are considered (which will return approximate
- *                               results).
- * @param[in] weight a weight for overlap between the closest landmark and
- *               the radius of other landmarks when pruning distances.
- *               Setting this value below 1 can effectively turn off
- *               computing distances against many other balls, enabling
- *               approximate nearest neighbors. Recall can be adjusted
- *               based on how many relevant balls are ignored. Note that
- *               many datasets can still have great recall even by only
- *               looking in the closest landmark.
- */
-template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
-                   BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
-                   raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
-                   raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
-                   int_t k,
-                   bool perform_post_filtering = true,
-                   float weight                = 1.0)
-{
-  RAFT_EXPECTS(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
-  RAFT_EXPECTS(k <= index.m,
-               "k must be less than or equal to the number of data points in the index");
-  RAFT_EXPECTS(inds.extent(1) == dists.extent(1) && dists.extent(1) == static_cast<matrix_idx_t>(k),
-               "Number of columns in output indices and distances matrices must be equal to k");
-
-  RAFT_EXPECTS(inds.extent(0) == dists.extent(0) && dists.extent(0) == index.get_X().extent(0),
-               "Number of rows in output indices and distances matrices must equal number of rows "
-               "in index matrix.");
-
-  all_knn_query(
-    handle, index, k, inds.data_handle(), dists.data_handle(), perform_post_filtering, weight);
-}
-
-/** @} */
-
-/**
- * Performs a faster exact knn in metric spaces using the triangle
- * inequality with a number of landmark points to reduce the
- * number of distance computations from O(n^2) to O(sqrt(n)). This
- * function does not build the index and assumes rbc_build_index() has
- * already been called. Use this function when the index and
- * query arrays are different, otherwise use rbc_all_knn_query().
- * @tparam idx_t index type
- * @tparam value_t distances type
- * @tparam int_t integer type for size info
- * @param[in] handle raft handle for resource management
- * @param[inout] index ball cover index which has not yet been built
- * @param[in] k number of nearest neighbors to find
- * @param[in] query the
- * @param[in] perform_post_filtering if this is false, only the closest k landmarks
- *                               are considered (which will return approximate
- *                               results).
- * @param[out] inds output knn indices
- * @param[out] dists output knn distances
- * @param[in] weight a weight for overlap between the closest landmark and
- *               the radius of other landmarks when pruning distances.
- *               Setting this value below 1 can effectively turn off
- *               computing distances against many other balls, enabling
- *               approximate nearest neighbors. Recall can be adjusted
- *               based on how many relevant balls are ignored. Note that
- *               many datasets can still have great recall even by only
- *               looking in the closest landmark.
- * @param[in] n_query_pts number of query points
- */
-template <typename idx_t, typename value_t, typename int_t>
-void knn_query(raft::device_resources const& handle,
-               const BallCoverIndex<idx_t, value_t, int_t>& index,
-               int_t k,
-               const value_t* query,
-               int_t n_query_pts,
-               idx_t* inds,
-               value_t* dists,
-               bool perform_post_filtering = true,
-               float weight                = 1.0)
-{
-  ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
-  if (index.metric == raft::distance::DistanceType::Haversine) {
-    raft::spatial::knn::detail::rbc_knn_query(handle,
-                                              index,
-                                              k,
-                                              query,
-                                              n_query_pts,
-                                              inds,
-                                              dists,
-                                              spatial::knn::detail::HaversineFunc<value_t, int_t>(),
-                                              perform_post_filtering,
-                                              weight);
-  } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded ||
-             index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) {
-    raft::spatial::knn::detail::rbc_knn_query(handle,
-                                              index,
-                                              k,
-                                              query,
-                                              n_query_pts,
-                                              inds,
-                                              dists,
-                                              spatial::knn::detail::EuclideanFunc<value_t, int_t>(),
-                                              perform_post_filtering,
-                                              weight);
-  } else {
-    RAFT_FAIL("Metric not supported");
-  }
-}
-
-/**
- * @ingroup random_ball_cover
- * @{
- */
-
-/**
- * Performs a faster exact knn in metric spaces using the triangle
- * inequality with a number of landmark points to reduce the
- * number of distance computations from O(n^2) to O(sqrt(n)). This
- * function does not build the index and assumes rbc_build_index() has
- * already been called. Use this function when the index and
- * query arrays are different, otherwise use rbc_all_knn_query().
- *
- * Usage example:
- * @code{.cpp}
- *
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/ball_cover.cuh>
- *  #include <raft/distance/distance_types.hpp>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  auto metric = raft::distance::DistanceType::L2Expanded;
- *
- *  // Build a ball cover index
- *  BallCoverIndex index(handle, X, metric);
- *  ball_cover::build_index(handle, index);
- *
- *  // Perform all neighbors knn query
- *  ball_cover::knn_query(handle, index, inds, dists, k);
- * @endcode
-
- *
- * @tparam idx_t index type
- * @tparam value_t distances type
- * @tparam int_t integer type for size info
- * @tparam matrix_idx_t
- * @param[in] handle raft handle for resource management
- * @param[in] index ball cover index which has not yet been built
- * @param[in] query device matrix containing query data points
- * @param[out] inds output knn indices
- * @param[out] dists output knn distances
- * @param[in] k number of nearest neighbors to find
- * @param[in] perform_post_filtering if this is false, only the closest k landmarks
- *                               are considered (which will return approximate
- *                               results).
- * @param[in] weight a weight for overlap between the closest landmark and
- *               the radius of other landmarks when pruning distances.
- *               Setting this value below 1 can effectively turn off
- *               computing distances against many other balls, enabling
- *               approximate nearest neighbors. Recall can be adjusted
- *               based on how many relevant balls are ignored. Note that
- *               many datasets can still have great recall even by only
- *               looking in the closest landmark.
- */
-template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void knn_query(raft::device_resources const& handle,
-               const BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
-               raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,
-               raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
-               raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
-               int_t k,
-               bool perform_post_filtering = true,
-               float weight                = 1.0)
-{
-  RAFT_EXPECTS(k <= index.m,
-               "k must be less than or equal to the number of data points in the index");
-  RAFT_EXPECTS(inds.extent(1) == dists.extent(1) && dists.extent(1) == static_cast<idx_t>(k),
-               "Number of columns in output indices and distances matrices must be equal to k");
-
-  RAFT_EXPECTS(inds.extent(0) == dists.extent(0) && dists.extent(0) == query.extent(0),
-               "Number of rows in output indices and distances matrices must equal number of rows "
-               "in search matrix.");
-
-  RAFT_EXPECTS(query.extent(1) == index.get_X().extent(1),
-               "Number of columns in query and index matrices must match.");
-
-  knn_query(handle,
-            index,
-            k,
-            query.data_handle(),
-            query.extent(0),
-            inds.data_handle(),
-            dists.data_handle(),
-            perform_post_filtering,
-            weight);
-}
-
-/** @} */
-
-// TODO: implement functions for:
-//  4. rbc_eps_neigh() - given a populated index, perform query against different query array
-//  5. rbc_all_eps_neigh() - populate a BallCoverIndex and query against training data
-
-}  // namespace raft::neighbors::ball_cover
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "ball_cover-inl.cuh"
+#endif
 
+#ifdef RAFT_COMPILED
+#include "ball_cover-ext.cuh"
 #endif
diff --git a/cpp/include/raft/neighbors/brute_force-ext.cuh b/cpp/include/raft/neighbors/brute_force-ext.cuh
new file mode 100644
index 0000000000..98a186db86
--- /dev/null
+++ b/cpp/include/raft/neighbors/brute_force-ext.cuh
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>       // raft::device_matrix_view
+#include <raft/core/device_resources.hpp>    // raft::device_resources
+#include <raft/core/operators.hpp>           // raft::identity_op
+#include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
+#include <raft/util/raft_explicit.hpp>       // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::brute_force {
+
+template <typename value_t, typename idx_t>
+inline void knn_merge_parts(
+  raft::device_resources const& handle,
+  raft::device_matrix_view<const value_t, idx_t, row_major> in_keys,
+  raft::device_matrix_view<const idx_t, idx_t, row_major> in_values,
+  raft::device_matrix_view<value_t, idx_t, row_major> out_keys,
+  raft::device_matrix_view<idx_t, idx_t, row_major> out_values,
+  size_t n_samples,
+  std::optional<raft::device_vector_view<idx_t, idx_t>> translations = std::nullopt) RAFT_EXPLICIT;
+
+template <typename idx_t,
+          typename value_t,
+          typename matrix_idx,
+          typename index_layout,
+          typename search_layout,
+          typename epilogue_op = raft::identity_op>
+void knn(raft::device_resources const& handle,
+         std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index,
+         raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,
+         raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
+         raft::device_matrix_view<value_t, matrix_idx, row_major> distances,
+         distance::DistanceType metric         = distance::DistanceType::L2Unexpanded,
+         std::optional<float> metric_arg       = std::make_optional<float>(2.0f),
+         std::optional<idx_t> global_id_offset = std::nullopt,
+         epilogue_op distance_epilogue         = raft::identity_op()) RAFT_EXPLICIT;
+
+template <typename value_t, typename idx_t, typename idx_layout, typename query_layout>
+void fused_l2_knn(raft::device_resources const& handle,
+                  raft::device_matrix_view<const value_t, idx_t, idx_layout> index,
+                  raft::device_matrix_view<const value_t, idx_t, query_layout> query,
+                  raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,
+                  raft::device_matrix_view<value_t, idx_t, row_major> out_dists,
+                  raft::distance::DistanceType metric) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::brute_force
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+// No extern template for raft::neighbors::brute_force::knn_merge_parts
+
+#define instantiate_raft_neighbors_brute_force_knn(                                         \
+  idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
+  extern template void raft::neighbors::brute_force::                                       \
+    knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
+      raft::device_resources const& handle,                                                 \
+      std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
+      raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
+      raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
+      raft::device_matrix_view<value_t, matrix_idx, row_major> distances,                   \
+      raft::distance::DistanceType metric,                                                  \
+      std::optional<float> metric_arg,                                                      \
+      std::optional<idx_t> global_id_offset,                                                \
+      epilogue_op distance_epilogue);
+
+instantiate_raft_neighbors_brute_force_knn(
+  int64_t, float, uint32_t, raft::row_major, raft::row_major, raft::identity_op);
+instantiate_raft_neighbors_brute_force_knn(
+  int64_t, float, int64_t, raft::row_major, raft::row_major, raft::identity_op);
+instantiate_raft_neighbors_brute_force_knn(
+  int, float, int, raft::row_major, raft::row_major, raft::identity_op);
+instantiate_raft_neighbors_brute_force_knn(
+  uint32_t, float, uint32_t, raft::row_major, raft::row_major, raft::identity_op);
+
+#undef instantiate_raft_neighbors_brute_force_knn
+
+#define instantiate_raft_neighbors_brute_force_fused_l2_knn(            \
+  value_t, idx_t, idx_layout, query_layout)                             \
+  extern template void raft::neighbors::brute_force::fused_l2_knn(      \
+    raft::device_resources const& handle,                               \
+    raft::device_matrix_view<const value_t, idx_t, idx_layout> index,   \
+    raft::device_matrix_view<const value_t, idx_t, query_layout> query, \
+    raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,         \
+    raft::device_matrix_view<value_t, idx_t, row_major> out_dists,      \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_brute_force_fused_l2_knn(float,
+                                                    int64_t,
+                                                    raft::row_major,
+                                                    raft::row_major)
+
+#undef instantiate_raft_neighbors_brute_force_fused_l2_knn
diff --git a/cpp/include/raft/neighbors/brute_force-inl.cuh b/cpp/include/raft/neighbors/brute_force-inl.cuh
new file mode 100644
index 0000000000..dac1a29c7f
--- /dev/null
+++ b/cpp/include/raft/neighbors/brute_force-inl.cuh
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/distance/distance_types.hpp>
+#include <raft/neighbors/detail/knn_brute_force.cuh>
+#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
+
+namespace raft::neighbors::brute_force {
+
+/**
+ * @defgroup brute_force_knn Brute-force K-Nearest Neighbors
+ * @{
+ */
+
+/**
+ * @brief Performs a k-select across several (contiguous) row-partitioned index/distance
+ * matrices formatted like the following:
+ *
+ * part1row1: k0, k1, k2, k3
+ * part1row2: k0, k1, k2, k3
+ * part1row3: k0, k1, k2, k3
+ * part2row1: k0, k1, k2, k3
+ * part2row2: k0, k1, k2, k3
+ * part2row3: k0, k1, k2, k3
+ * etc...
+ *
+ * The example above shows what an aggregated index/distance matrix
+ * would look like with two partitions when n_samples=3 and k=4.
+ *
+ * When working with extremely large data sets that have been broken
+ * over multiple indexes, such as when computing over multiple GPUs,
+ * the ids will often start at 0 for each local knn index but the
+ * global ids need to be used when merging them together. An optional
+ * translations vector can be supplied to map the starting id of
+ * each partition to its global id so that the final merged knn
+ * is based on the global ids.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/brute_force.cuh>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  compute multiple knn graphs and aggregate row-wise
+ *  (see detailed description above)
+ *  ...
+ *  brute_force::knn_merge_parts(handle, in_keys, in_values, out_keys, out_values, n_samples);
+ * @endcode
+ *
+ * @tparam idx_t
+ * @tparam value_t
+ *
+ * @param[in] handle
+ * @param[in] in_keys matrix of input keys (size n_samples * n_parts * k)
+ * @param[in] in_values matrix of input values (size n_samples * n_parts * k)
+ * @param[out] out_keys matrix of output keys (size n_samples * k)
+ * @param[out] out_values matrix of output values (size n_samples * k)
+ * @param[in] n_samples number of rows in each partition
+ * @param[in] translations optional vector of starting global id mappings for each local partition
+ */
+template <typename value_t, typename idx_t>
+inline void knn_merge_parts(
+  raft::device_resources const& handle,
+  raft::device_matrix_view<const value_t, idx_t, row_major> in_keys,
+  raft::device_matrix_view<const idx_t, idx_t, row_major> in_values,
+  raft::device_matrix_view<value_t, idx_t, row_major> out_keys,
+  raft::device_matrix_view<idx_t, idx_t, row_major> out_values,
+  size_t n_samples,
+  std::optional<raft::device_vector_view<idx_t, idx_t>> translations = std::nullopt)
+{
+  RAFT_EXPECTS(in_keys.extent(1) == in_values.extent(1) && in_keys.extent(0) == in_values.extent(0),
+               "in_keys and in_values must have the same shape.");
+  RAFT_EXPECTS(
+    out_keys.extent(0) == out_values.extent(0) == n_samples,
+    "Number of rows in output keys and val matrices must equal number of rows in search matrix.");
+  RAFT_EXPECTS(out_keys.extent(1) == out_values.extent(1) == in_keys.extent(1),
+               "Number of columns in output indices and distances matrices must be equal to k");
+
+  auto n_parts = in_keys.extent(0) / n_samples;
+  detail::knn_merge_parts(in_keys.data_handle(),
+                          in_values.data_handle(),
+                          out_keys.data_handle(),
+                          out_values.data_handle(),
+                          n_samples,
+                          n_parts,
+                          in_keys.extent(1),
+                          handle.get_stream(),
+                          translations.value_or(nullptr));
+}
+
+/**
+ * @brief Flat C++ API function to perform a brute force knn on
+ * a series of input arrays and combine the results into a single
+ * output array for indexes and distances. Inputs can be either
+ * row- or column-major but the output matrices will always be in
+ * row-major format.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/brute_force.cuh>
+ *  #include <raft/distance/distance_types.hpp>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
+ *  brute_force::knn(handle, index, search, indices, distances, metric);
+ * @endcode
+ *
+ * @param[in] handle: the cuml handle to use
+ * @param[in] index: vector of device matrices (each size m_i*d) to be used as the knn index
+ * @param[in] search: matrix (size n*d) to be used for searching the index
+ * @param[out] indices: matrix (size n*k) to store output knn indices
+ * @param[out] distances: matrix (size n*k) to store the output knn distance
+ * @param[in] metric: distance metric to use. Euclidean (L2) is used by default
+ * @param[in] metric_arg: the value of `p` for Minkowski (l-p) distances. This
+ * 					 is ignored if the metric_type is not Minkowski.
+ * @param[in] global_id_offset: optional starting global id mapping for the local partition
+ *                              (assumes the index contains contiguous ids in the global id space)
+ * @param[in] distance_epilogue: optional epilogue function to run after computing distances. This
+                                 function takes a triple of the (value, rowid, colid) for each
+                                 element in the pairwise distances and returns a transformed value
+                                 back.
+ */
+template <typename idx_t,
+          typename value_t,
+          typename matrix_idx,
+          typename index_layout,
+          typename search_layout,
+          typename epilogue_op = raft::identity_op>
+void knn(raft::device_resources const& handle,
+         std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index,
+         raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,
+         raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
+         raft::device_matrix_view<value_t, matrix_idx, row_major> distances,
+         distance::DistanceType metric         = distance::DistanceType::L2Unexpanded,
+         std::optional<float> metric_arg       = std::make_optional<float>(2.0f),
+         std::optional<idx_t> global_id_offset = std::nullopt,
+         epilogue_op distance_epilogue         = raft::identity_op())
+{
+  RAFT_EXPECTS(index[0].extent(1) == search.extent(1),
+               "Number of dimensions for both index and search matrices must be equal");
+
+  RAFT_EXPECTS(indices.extent(0) == distances.extent(0) && distances.extent(0) == search.extent(0),
+               "Number of rows in output indices and distances matrices must equal number of rows "
+               "in search matrix.");
+  RAFT_EXPECTS(indices.extent(1) == distances.extent(1) && distances.extent(1),
+               "Number of columns in output indices and distances matrices must the same");
+
+  bool rowMajorIndex = std::is_same_v<index_layout, layout_c_contiguous>;
+  bool rowMajorQuery = std::is_same_v<search_layout, layout_c_contiguous>;
+
+  std::vector<value_t*> inputs;
+  std::vector<matrix_idx> sizes;
+  for (std::size_t i = 0; i < index.size(); ++i) {
+    inputs.push_back(const_cast<value_t*>(index[i].data_handle()));
+    sizes.push_back(index[i].extent(0));
+  }
+
+  std::vector<idx_t> trans;
+  if (global_id_offset.has_value()) { trans.push_back(global_id_offset.value()); }
+
+  std::vector<idx_t>* trans_arg = global_id_offset.has_value() ? &trans : nullptr;
+
+  raft::neighbors::detail::brute_force_knn_impl(handle,
+                                                inputs,
+                                                sizes,
+                                                index[0].extent(1),
+                                                // TODO: This is unfortunate. Need to fix.
+                                                const_cast<value_t*>(search.data_handle()),
+                                                search.extent(0),
+                                                indices.data_handle(),
+                                                distances.data_handle(),
+                                                indices.extent(1),
+                                                rowMajorIndex,
+                                                rowMajorQuery,
+                                                trans_arg,
+                                                metric,
+                                                metric_arg.value_or(2.0f),
+                                                distance_epilogue);
+}
+
+/**
+ * @brief Compute the k-nearest neighbors using L2 expanded/unexpanded distance.
+ *
+ * This is a specialized function for fusing the k-selection with the distance
+ * computation when k < 64. The value of k will be inferred from the number
+ * of columns in the output matrices.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/neighbors/brute_force.cuh>
+ *  #include <raft/distance/distance_types.hpp>
+ *  using namespace raft::neighbors;
+ *
+ *  raft::raft::device_resources handle;
+ *  ...
+ *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
+ *  brute_force::fused_l2_knn(handle, index, search, indices, distances, metric);
+ * @endcode
+
+ * @tparam value_t type of values
+ * @tparam idx_t type of indices
+ * @tparam idx_layout layout type of index matrix
+ * @tparam query_layout layout type of query matrix
+ * @param[in] handle raft handle for sharing expensive resources
+ * @param[in] index input index array on device (size m * d)
+ * @param[in] query input query array on device (size n * d)
+ * @param[out] out_inds output indices array on device (size n * k)
+ * @param[out] out_dists output dists array on device (size n * k)
+ * @param[in] metric type of distance computation to perform (must be a variant of L2)
+ */
+template <typename value_t, typename idx_t, typename idx_layout, typename query_layout>
+void fused_l2_knn(raft::device_resources const& handle,
+                  raft::device_matrix_view<const value_t, idx_t, idx_layout> index,
+                  raft::device_matrix_view<const value_t, idx_t, query_layout> query,
+                  raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,
+                  raft::device_matrix_view<value_t, idx_t, row_major> out_dists,
+                  raft::distance::DistanceType metric)
+{
+  int k = static_cast<int>(out_inds.extent(1));
+
+  RAFT_EXPECTS(k <= 64, "For fused k-selection, k must be < 64");
+  RAFT_EXPECTS(out_inds.extent(1) == out_dists.extent(1), "Value of k must match for outputs");
+  RAFT_EXPECTS(index.extent(1) == query.extent(1),
+               "Number of columns in input matrices must be the same.");
+
+  RAFT_EXPECTS(metric == distance::DistanceType::L2Expanded ||
+                 metric == distance::DistanceType::L2Unexpanded ||
+                 metric == distance::DistanceType::L2SqrtUnexpanded ||
+                 metric == distance::DistanceType::L2SqrtExpanded,
+               "Distance metric must be L2");
+
+  size_t n_index_rows = index.extent(0);
+  size_t n_query_rows = query.extent(0);
+  size_t D            = index.extent(1);
+
+  RAFT_EXPECTS(raft::is_row_or_column_major(index), "Index must be row or column major layout");
+  RAFT_EXPECTS(raft::is_row_or_column_major(query), "Query must be row or column major layout");
+
+  const bool rowMajorIndex = raft::is_row_major(index);
+  const bool rowMajorQuery = raft::is_row_major(query);
+
+  raft::spatial::knn::detail::fusedL2Knn(D,
+                                         out_inds.data_handle(),
+                                         out_dists.data_handle(),
+                                         index.data_handle(),
+                                         query.data_handle(),
+                                         n_index_rows,
+                                         n_query_rows,
+                                         k,
+                                         rowMajorIndex,
+                                         rowMajorQuery,
+                                         handle.get_stream(),
+                                         metric);
+}
+
+/** @} */  // end group brute_force_knn
+
+}  // namespace raft::neighbors::brute_force
diff --git a/cpp/include/raft/neighbors/brute_force.cuh b/cpp/include/raft/neighbors/brute_force.cuh
index dac1a29c7f..6cebf4b52a 100644
--- a/cpp/include/raft/neighbors/brute_force.cuh
+++ b/cpp/include/raft/neighbors/brute_force.cuh
@@ -13,268 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/core/device_mdspan.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/neighbors/detail/knn_brute_force.cuh>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft::neighbors::brute_force {
-
-/**
- * @defgroup brute_force_knn Brute-force K-Nearest Neighbors
- * @{
- */
-
-/**
- * @brief Performs a k-select across several (contiguous) row-partitioned index/distance
- * matrices formatted like the following:
- *
- * part1row1: k0, k1, k2, k3
- * part1row2: k0, k1, k2, k3
- * part1row3: k0, k1, k2, k3
- * part2row1: k0, k1, k2, k3
- * part2row2: k0, k1, k2, k3
- * part2row3: k0, k1, k2, k3
- * etc...
- *
- * The example above shows what an aggregated index/distance matrix
- * would look like with two partitions when n_samples=3 and k=4.
- *
- * When working with extremely large data sets that have been broken
- * over multiple indexes, such as when computing over multiple GPUs,
- * the ids will often start at 0 for each local knn index but the
- * global ids need to be used when merging them together. An optional
- * translations vector can be supplied to map the starting id of
- * each partition to its global id so that the final merged knn
- * is based on the global ids.
- *
- * Usage example:
- * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/brute_force.cuh>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  compute multiple knn graphs and aggregate row-wise
- *  (see detailed description above)
- *  ...
- *  brute_force::knn_merge_parts(handle, in_keys, in_values, out_keys, out_values, n_samples);
- * @endcode
- *
- * @tparam idx_t
- * @tparam value_t
- *
- * @param[in] handle
- * @param[in] in_keys matrix of input keys (size n_samples * n_parts * k)
- * @param[in] in_values matrix of input values (size n_samples * n_parts * k)
- * @param[out] out_keys matrix of output keys (size n_samples * k)
- * @param[out] out_values matrix of output values (size n_samples * k)
- * @param[in] n_samples number of rows in each partition
- * @param[in] translations optional vector of starting global id mappings for each local partition
- */
-template <typename value_t, typename idx_t>
-inline void knn_merge_parts(
-  raft::device_resources const& handle,
-  raft::device_matrix_view<const value_t, idx_t, row_major> in_keys,
-  raft::device_matrix_view<const idx_t, idx_t, row_major> in_values,
-  raft::device_matrix_view<value_t, idx_t, row_major> out_keys,
-  raft::device_matrix_view<idx_t, idx_t, row_major> out_values,
-  size_t n_samples,
-  std::optional<raft::device_vector_view<idx_t, idx_t>> translations = std::nullopt)
-{
-  RAFT_EXPECTS(in_keys.extent(1) == in_values.extent(1) && in_keys.extent(0) == in_values.extent(0),
-               "in_keys and in_values must have the same shape.");
-  RAFT_EXPECTS(
-    out_keys.extent(0) == out_values.extent(0) == n_samples,
-    "Number of rows in output keys and val matrices must equal number of rows in search matrix.");
-  RAFT_EXPECTS(out_keys.extent(1) == out_values.extent(1) == in_keys.extent(1),
-               "Number of columns in output indices and distances matrices must be equal to k");
-
-  auto n_parts = in_keys.extent(0) / n_samples;
-  detail::knn_merge_parts(in_keys.data_handle(),
-                          in_values.data_handle(),
-                          out_keys.data_handle(),
-                          out_values.data_handle(),
-                          n_samples,
-                          n_parts,
-                          in_keys.extent(1),
-                          handle.get_stream(),
-                          translations.value_or(nullptr));
-}
-
-/**
- * @brief Flat C++ API function to perform a brute force knn on
- * a series of input arrays and combine the results into a single
- * output array for indexes and distances. Inputs can be either
- * row- or column-major but the output matrices will always be in
- * row-major format.
- *
- * Usage example:
- * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/brute_force.cuh>
- *  #include <raft/distance/distance_types.hpp>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
- *  brute_force::knn(handle, index, search, indices, distances, metric);
- * @endcode
- *
- * @param[in] handle: the cuml handle to use
- * @param[in] index: vector of device matrices (each size m_i*d) to be used as the knn index
- * @param[in] search: matrix (size n*d) to be used for searching the index
- * @param[out] indices: matrix (size n*k) to store output knn indices
- * @param[out] distances: matrix (size n*k) to store the output knn distance
- * @param[in] metric: distance metric to use. Euclidean (L2) is used by default
- * @param[in] metric_arg: the value of `p` for Minkowski (l-p) distances. This
- * 					 is ignored if the metric_type is not Minkowski.
- * @param[in] global_id_offset: optional starting global id mapping for the local partition
- *                              (assumes the index contains contiguous ids in the global id space)
- * @param[in] distance_epilogue: optional epilogue function to run after computing distances. This
-                                 function takes a triple of the (value, rowid, colid) for each
-                                 element in the pairwise distances and returns a transformed value
-                                 back.
- */
-template <typename idx_t,
-          typename value_t,
-          typename matrix_idx,
-          typename index_layout,
-          typename search_layout,
-          typename epilogue_op = raft::identity_op>
-void knn(raft::device_resources const& handle,
-         std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index,
-         raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,
-         raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
-         raft::device_matrix_view<value_t, matrix_idx, row_major> distances,
-         distance::DistanceType metric         = distance::DistanceType::L2Unexpanded,
-         std::optional<float> metric_arg       = std::make_optional<float>(2.0f),
-         std::optional<idx_t> global_id_offset = std::nullopt,
-         epilogue_op distance_epilogue         = raft::identity_op())
-{
-  RAFT_EXPECTS(index[0].extent(1) == search.extent(1),
-               "Number of dimensions for both index and search matrices must be equal");
-
-  RAFT_EXPECTS(indices.extent(0) == distances.extent(0) && distances.extent(0) == search.extent(0),
-               "Number of rows in output indices and distances matrices must equal number of rows "
-               "in search matrix.");
-  RAFT_EXPECTS(indices.extent(1) == distances.extent(1) && distances.extent(1),
-               "Number of columns in output indices and distances matrices must the same");
-
-  bool rowMajorIndex = std::is_same_v<index_layout, layout_c_contiguous>;
-  bool rowMajorQuery = std::is_same_v<search_layout, layout_c_contiguous>;
-
-  std::vector<value_t*> inputs;
-  std::vector<matrix_idx> sizes;
-  for (std::size_t i = 0; i < index.size(); ++i) {
-    inputs.push_back(const_cast<value_t*>(index[i].data_handle()));
-    sizes.push_back(index[i].extent(0));
-  }
-
-  std::vector<idx_t> trans;
-  if (global_id_offset.has_value()) { trans.push_back(global_id_offset.value()); }
-
-  std::vector<idx_t>* trans_arg = global_id_offset.has_value() ? &trans : nullptr;
-
-  raft::neighbors::detail::brute_force_knn_impl(handle,
-                                                inputs,
-                                                sizes,
-                                                index[0].extent(1),
-                                                // TODO: This is unfortunate. Need to fix.
-                                                const_cast<value_t*>(search.data_handle()),
-                                                search.extent(0),
-                                                indices.data_handle(),
-                                                distances.data_handle(),
-                                                indices.extent(1),
-                                                rowMajorIndex,
-                                                rowMajorQuery,
-                                                trans_arg,
-                                                metric,
-                                                metric_arg.value_or(2.0f),
-                                                distance_epilogue);
-}
-
-/**
- * @brief Compute the k-nearest neighbors using L2 expanded/unexpanded distance.
- *
- * This is a specialized function for fusing the k-selection with the distance
- * computation when k < 64. The value of k will be inferred from the number
- * of columns in the output matrices.
- *
- * Usage example:
- * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
- *  #include <raft/neighbors/brute_force.cuh>
- *  #include <raft/distance/distance_types.hpp>
- *  using namespace raft::neighbors;
- *
- *  raft::raft::device_resources handle;
- *  ...
- *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
- *  brute_force::fused_l2_knn(handle, index, search, indices, distances, metric);
- * @endcode
-
- * @tparam value_t type of values
- * @tparam idx_t type of indices
- * @tparam idx_layout layout type of index matrix
- * @tparam query_layout layout type of query matrix
- * @param[in] handle raft handle for sharing expensive resources
- * @param[in] index input index array on device (size m * d)
- * @param[in] query input query array on device (size n * d)
- * @param[out] out_inds output indices array on device (size n * k)
- * @param[out] out_dists output dists array on device (size n * k)
- * @param[in] metric type of distance computation to perform (must be a variant of L2)
- */
-template <typename value_t, typename idx_t, typename idx_layout, typename query_layout>
-void fused_l2_knn(raft::device_resources const& handle,
-                  raft::device_matrix_view<const value_t, idx_t, idx_layout> index,
-                  raft::device_matrix_view<const value_t, idx_t, query_layout> query,
-                  raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,
-                  raft::device_matrix_view<value_t, idx_t, row_major> out_dists,
-                  raft::distance::DistanceType metric)
-{
-  int k = static_cast<int>(out_inds.extent(1));
-
-  RAFT_EXPECTS(k <= 64, "For fused k-selection, k must be < 64");
-  RAFT_EXPECTS(out_inds.extent(1) == out_dists.extent(1), "Value of k must match for outputs");
-  RAFT_EXPECTS(index.extent(1) == query.extent(1),
-               "Number of columns in input matrices must be the same.");
-
-  RAFT_EXPECTS(metric == distance::DistanceType::L2Expanded ||
-                 metric == distance::DistanceType::L2Unexpanded ||
-                 metric == distance::DistanceType::L2SqrtUnexpanded ||
-                 metric == distance::DistanceType::L2SqrtExpanded,
-               "Distance metric must be L2");
-
-  size_t n_index_rows = index.extent(0);
-  size_t n_query_rows = query.extent(0);
-  size_t D            = index.extent(1);
-
-  RAFT_EXPECTS(raft::is_row_or_column_major(index), "Index must be row or column major layout");
-  RAFT_EXPECTS(raft::is_row_or_column_major(query), "Query must be row or column major layout");
-
-  const bool rowMajorIndex = raft::is_row_major(index);
-  const bool rowMajorQuery = raft::is_row_major(query);
-
-  raft::spatial::knn::detail::fusedL2Knn(D,
-                                         out_inds.data_handle(),
-                                         out_dists.data_handle(),
-                                         index.data_handle(),
-                                         query.data_handle(),
-                                         n_index_rows,
-                                         n_query_rows,
-                                         k,
-                                         rowMajorIndex,
-                                         rowMajorQuery,
-                                         handle.get_stream(),
-                                         metric);
-}
-
-/** @} */  // end group brute_force_knn
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "brute_force-inl.cuh"
+#endif
 
-}  // namespace raft::neighbors::brute_force
+#ifdef RAFT_COMPILED
+#include "brute_force-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh
new file mode 100644
index 0000000000..46f72c4005
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                            // uintX_t
+#include <raft/neighbors/ivf_flat_types.hpp>  // raft::neighbors::ivf_flat::index
+#include <raft/util/raft_explicit.hpp>        // RAFT_EXPLICIT
+#include <rmm/cuda_stream_view.hpp>           // rmm:cuda_stream_view
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ivf_flat::detail {
+
+template <typename T, typename AccT, typename IdxT>
+void ivfflat_interleaved_scan(const raft::neighbors::ivf_flat::index<T, IdxT>& index,
+                              const T* queries,
+                              const uint32_t* coarse_query_results,
+                              const uint32_t n_queries,
+                              const raft::distance::DistanceType metric,
+                              const uint32_t n_probes,
+                              const uint32_t k,
+                              const bool select_min,
+                              IdxT* neighbors,
+                              float* distances,
+                              uint32_t& grid_dim_x,
+                              rmm::cuda_stream_view stream) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ivf_flat::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(T, AccT, IdxT)         \
+  extern template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<T, AccT, IdxT>( \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,                                        \
+    const T* queries,                                                                              \
+    const uint32_t* coarse_query_results,                                                          \
+    const uint32_t n_queries,                                                                      \
+    const raft::distance::DistanceType metric,                                                     \
+    const uint32_t n_probes,                                                                       \
+    const uint32_t k,                                                                              \
+    const bool select_min,                                                                         \
+    IdxT* neighbors,                                                                               \
+    float* distances,                                                                              \
+    uint32_t& grid_dim_x,                                                                          \
+    rmm::cuda_stream_view stream)
+
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(float, float, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(int8_t, int32_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(uint8_t, uint32_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
new file mode 100644
index 0000000000..4eed2aa453
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh
@@ -0,0 +1,1076 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/logger.hpp>  // RAFT_LOG_TRACE
+#include <raft/core/operators.hpp>
+#include <raft/distance/distance_types.hpp>
+#include <raft/matrix/detail/select_warpsort.cuh>
+#include <raft/neighbors/ivf_flat_types.hpp>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+#include <raft/util/cuda_rt_essentials.hpp>  // RAFT_CUDA_TRY
+#include <raft/util/device_loads_stores.cuh>
+#include <raft/util/integer_utils.hpp>
+#include <raft/util/pow2_utils.cuh>
+#include <raft/util/vectorized.cuh>
+#include <rmm/cuda_stream_view.hpp>
+
+namespace raft::neighbors::ivf_flat::detail {
+
+using namespace raft::spatial::knn::detail;  // NOLINT
+
+constexpr int kThreadsPerBlock = 128;
+
+/**
+ * @brief Copy `n` elements per block from one place to another.
+ *
+ * @param[out] out target pointer (unique per block)
+ * @param[in] in source pointer
+ * @param n number of elements to copy
+ */
+template <int VecBytes = 16, typename T>
+__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n)
+{
+  constexpr int VecElems = VecBytes / sizeof(T);  // NOLINT
+  using align_bytes      = Pow2<(size_t)VecBytes>;
+  if constexpr (VecElems > 1) {
+    using align_elems = Pow2<VecElems>;
+    if (!align_bytes::areSameAlignOffsets(out, in)) {
+      return copy_vectorized<(VecBytes >> 1), T>(out, in, n);
+    }
+    {  // process unaligned head
+      uint32_t head = align_bytes::roundUp(in) - in;
+      if (head > 0) {
+        copy_vectorized<sizeof(T), T>(out, in, head);
+        n -= head;
+        in += head;
+        out += head;
+      }
+    }
+    {  // process main part vectorized
+      using vec_t = typename IOType<T, VecElems>::Type;
+      copy_vectorized<sizeof(vec_t), vec_t>(
+        reinterpret_cast<vec_t*>(out), reinterpret_cast<const vec_t*>(in), align_elems::div(n));
+    }
+    {  // process unaligned tail
+      uint32_t tail = align_elems::mod(n);
+      if (tail > 0) {
+        n -= tail;
+        copy_vectorized<sizeof(T), T>(out + n, in + n, tail);
+      }
+    }
+  }
+  if constexpr (VecElems <= 1) {
+    for (int i = threadIdx.x; i < n; i += blockDim.x) {
+      out[i] = in[i];
+    }
+  }
+}
+
+/**
+ * @brief Load a part of a vector from the index and from query, compute the (part of the) distance
+ * between them, and aggregate it using the provided Lambda; one structure per thread, per query,
+ * and per index item.
+ *
+ * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen)
+ * @tparam Lambda computing the part of the distance for one dimension and aggregating it:
+ *                void (AccT& acc, AccT x, AccT y)
+ * @tparam Veclen size of the vectorized load
+ * @tparam T type of the data in the query and the index
+ * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit
+ * values)
+ */
+template <int kUnroll, typename Lambda, int Veclen, typename T, typename AccT>
+struct loadAndComputeDist {
+  Lambda compute_dist;
+  AccT& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(AccT& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  /**
+   * Load parts of vectors from the index and query and accumulates the partial distance.
+   * This version assumes the query is stored in shared memory.
+   * Every thread here processes exactly kUnroll * Veclen elements independently of others.
+   */
+  template <typename IdxT>
+  __device__ __forceinline__ void runLoadShmemCompute(const T* const& data,
+                                                      const T* query_shared,
+                                                      IdxT loadIndex,
+                                                      IdxT shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      T encV[Veclen];
+      ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen);
+      T queryRegs[Veclen];
+      lds(queryRegs, &query_shared[shmemIndex + j * Veclen]);
+#pragma unroll
+      for (int k = 0; k < Veclen; ++k) {
+        compute_dist(dist, queryRegs[k], encV[k]);
+      }
+    }
+  }
+
+  /**
+   * Load parts of vectors from the index and query and accumulates the partial distance.
+   * This version assumes the query is stored in the global memory and is different for every
+   * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into
+   * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once).
+   */
+  template <typename IdxT>
+  __device__ __forceinline__ void runLoadShflAndCompute(const T*& data,
+                                                        const T* query,
+                                                        IdxT baseLoadIndex,
+                                                        const int lane_id)
+  {
+    T queryReg               = query[baseLoadIndex + lane_id];
+    constexpr int stride     = kUnroll * Veclen;
+    constexpr int totalIter  = WarpSize / stride;
+    constexpr int gmemStride = stride * kIndexGroupSize;
+#pragma unroll
+    for (int i = 0; i < totalIter; ++i, data += gmemStride) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        T encV[Veclen];
+        ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen);
+        const int d = (i * kUnroll + j) * Veclen;
+#pragma unroll
+        for (int k = 0; k < Veclen; ++k) {
+          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
+        }
+      }
+    }
+  }
+
+  /**
+   * Load parts of vectors from the index and query and accumulates the partial distance.
+   * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`.
+   */
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
+    const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks)
+  {
+    const int loadDim     = dimBlocks + lane_id;
+    T queryReg            = loadDim < dim ? query[loadDim] : 0;
+    const int loadDataIdx = lane_id * Veclen;
+    for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) {
+      T enc[Veclen];
+      ldg(enc, data + loadDataIdx);
+#pragma unroll
+      for (int k = 0; k < Veclen; k++) {
+        compute_dist(dist, shfl(queryReg, d + k, WarpSize), enc[k]);
+      }
+    }
+  }
+};
+
+// This handles uint8_t 8, 16 Veclens
+template <int kUnroll, typename Lambda, int uint8_veclen>
+struct loadAndComputeDist<kUnroll, Lambda, uint8_veclen, uint8_t, uint32_t> {
+  Lambda compute_dist;
+  uint32_t& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
+                                                      const uint8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+    constexpr int veclen_int = uint8_veclen / 4;  // converting uint8_t veclens to int
+    loadIndex                = loadIndex * veclen_int;
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      uint32_t encV[veclen_int];
+      ldg(encV,
+          reinterpret_cast<unsigned const*>(data) + loadIndex + j * kIndexGroupSize * veclen_int);
+      uint32_t queryRegs[veclen_int];
+      lds(queryRegs, reinterpret_cast<unsigned const*>(query_shared + shmemIndex) + j * veclen_int);
+#pragma unroll
+      for (int k = 0; k < veclen_int; k++) {
+        compute_dist(dist, queryRegs[k], encV[k]);
+      }
+    }
+  }
+  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
+                                                        const uint8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    constexpr int veclen_int = uint8_veclen / 4;  // converting uint8_t veclens to int
+    uint32_t queryReg =
+      (lane_id < 8) ? reinterpret_cast<unsigned const*>(query + baseLoadIndex)[lane_id] : 0;
+    constexpr int stride = kUnroll * uint8_veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        uint32_t encV[veclen_int];
+        ldg(encV,
+            reinterpret_cast<unsigned const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
+        const int d = (i * kUnroll + j) * veclen_int;
+#pragma unroll
+        for (int k = 0; k < veclen_int; ++k) {
+          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
+        }
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
+                                                                 const uint8_t* query,
+                                                                 const int lane_id,
+                                                                 const int dim,
+                                                                 const int dimBlocks)
+  {
+    constexpr int veclen_int = uint8_veclen / 4;
+    const int loadDim        = dimBlocks + lane_id * 4;  // Here 4 is for 1 - int
+    uint32_t queryReg = loadDim < dim ? reinterpret_cast<uint32_t const*>(query + loadDim)[0] : 0;
+    for (int d = 0; d < dim - dimBlocks;
+         d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) {
+      uint32_t enc[veclen_int];
+      ldg(enc, reinterpret_cast<uint32_t const*>(data) + lane_id * veclen_int);
+#pragma unroll
+      for (int k = 0; k < veclen_int; k++) {
+        uint32_t q = shfl(queryReg, (d / 4) + k, WarpSize);
+        compute_dist(dist, q, enc[k]);
+      }
+    }
+  }
+};
+
+// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while
+// using above common template of int2/int4
+template <int kUnroll, typename Lambda>
+struct loadAndComputeDist<kUnroll, Lambda, 4, uint8_t, uint32_t> {
+  Lambda compute_dist;
+  uint32_t& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
+                                                      const uint8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      uint32_t encV      = reinterpret_cast<unsigned const*>(data)[loadIndex + j * kIndexGroupSize];
+      uint32_t queryRegs = reinterpret_cast<unsigned const*>(query_shared + shmemIndex)[j];
+      compute_dist(dist, queryRegs, encV);
+    }
+  }
+  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
+                                                        const uint8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    uint32_t queryReg =
+      (lane_id < 8) ? reinterpret_cast<unsigned const*>(query + baseLoadIndex)[lane_id] : 0;
+    constexpr int veclen = 4;
+    constexpr int stride = kUnroll * veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        uint32_t encV = reinterpret_cast<unsigned const*>(data)[lane_id + j * kIndexGroupSize];
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        compute_dist(dist, q, encV);
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
+                                                                 const uint8_t* query,
+                                                                 const int lane_id,
+                                                                 const int dim,
+                                                                 const int dimBlocks)
+  {
+    constexpr int veclen = 4;
+    const int loadDim    = dimBlocks + lane_id;
+    uint32_t queryReg    = loadDim < dim ? reinterpret_cast<unsigned const*>(query)[loadDim] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
+      uint32_t enc = reinterpret_cast<unsigned const*>(data)[lane_id];
+      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      compute_dist(dist, q, enc);
+    }
+  }
+};
+
+template <int kUnroll, typename Lambda>
+struct loadAndComputeDist<kUnroll, Lambda, 2, uint8_t, uint32_t> {
+  Lambda compute_dist;
+  uint32_t& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
+                                                      const uint8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      uint32_t encV      = reinterpret_cast<uint16_t const*>(data)[loadIndex + j * kIndexGroupSize];
+      uint32_t queryRegs = reinterpret_cast<uint16_t const*>(query_shared + shmemIndex)[j];
+      compute_dist(dist, queryRegs, encV);
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
+                                                        const uint8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    uint32_t queryReg =
+      (lane_id < 16) ? reinterpret_cast<uint16_t const*>(query + baseLoadIndex)[lane_id] : 0;
+    constexpr int veclen = 2;
+    constexpr int stride = kUnroll * veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        uint32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        compute_dist(dist, q, encV);
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
+                                                                 const uint8_t* query,
+                                                                 const int lane_id,
+                                                                 const int dim,
+                                                                 const int dimBlocks)
+  {
+    constexpr int veclen = 2;
+    int loadDim          = dimBlocks + lane_id * veclen;
+    uint32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
+      uint32_t enc = reinterpret_cast<uint16_t const*>(data)[lane_id];
+      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      compute_dist(dist, q, enc);
+    }
+  }
+};
+
+template <int kUnroll, typename Lambda>
+struct loadAndComputeDist<kUnroll, Lambda, 1, uint8_t, uint32_t> {
+  Lambda compute_dist;
+  uint32_t& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
+                                                      const uint8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      uint32_t encV      = data[loadIndex + j * kIndexGroupSize];
+      uint32_t queryRegs = query_shared[shmemIndex + j];
+      compute_dist(dist, queryRegs, encV);
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
+                                                        const uint8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    uint32_t queryReg    = query[baseLoadIndex + lane_id];
+    constexpr int veclen = 1;
+    constexpr int stride = kUnroll * veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        uint32_t encV = data[lane_id + j * kIndexGroupSize];
+        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        compute_dist(dist, q, encV);
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
+                                                                 const uint8_t* query,
+                                                                 const int lane_id,
+                                                                 const int dim,
+                                                                 const int dimBlocks)
+  {
+    constexpr int veclen = 1;
+    int loadDim          = dimBlocks + lane_id;
+    uint32_t queryReg    = loadDim < dim ? query[loadDim] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
+      uint32_t enc = data[lane_id];
+      uint32_t q   = shfl(queryReg, d, WarpSize);
+      compute_dist(dist, q, enc);
+    }
+  }
+};
+
+// This device function is for int8 veclens 4, 8 and 16
+template <int kUnroll, typename Lambda, int int8_veclen>
+struct loadAndComputeDist<kUnroll, Lambda, int8_veclen, int8_t, int32_t> {
+  Lambda compute_dist;
+  int32_t& dist;
+
+  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
+                                                      const int8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+    constexpr int veclen_int = int8_veclen / 4;  // converting int8_t veclens to int
+
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      int32_t encV[veclen_int];
+      ldg(encV,
+          reinterpret_cast<int32_t const*>(data) + (loadIndex + j * kIndexGroupSize) * veclen_int);
+      int32_t queryRegs[veclen_int];
+      lds(queryRegs, reinterpret_cast<int32_t const*>(query_shared + shmemIndex) + j * veclen_int);
+#pragma unroll
+      for (int k = 0; k < veclen_int; k++) {
+        compute_dist(dist, queryRegs[k], encV[k]);
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
+                                                        const int8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    constexpr int veclen_int = int8_veclen / 4;  // converting int8_t veclens to int
+
+    int32_t queryReg =
+      (lane_id < 8) ? reinterpret_cast<int32_t const*>(query + baseLoadIndex)[lane_id] : 0;
+    constexpr int stride = kUnroll * int8_veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        int32_t encV[veclen_int];
+        ldg(encV,
+            reinterpret_cast<int32_t const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
+        const int d = (i * kUnroll + j) * veclen_int;
+#pragma unroll
+        for (int k = 0; k < veclen_int; ++k) {
+          int32_t q = shfl(queryReg, d + k, WarpSize);
+          compute_dist(dist, q, encV[k]);
+        }
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
+    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
+  {
+    constexpr int veclen_int = int8_veclen / 4;
+    const int loadDim        = dimBlocks + lane_id * 4;  // Here 4 is for 1 - int;
+    int32_t queryReg = loadDim < dim ? reinterpret_cast<int32_t const*>(query + loadDim)[0] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) {
+      int32_t enc[veclen_int];
+      ldg(enc, reinterpret_cast<int32_t const*>(data) + lane_id * veclen_int);
+#pragma unroll
+      for (int k = 0; k < veclen_int; k++) {
+        int32_t q = shfl(queryReg, (d / 4) + k, WarpSize);  // Here 4 is for 1 - int;
+        compute_dist(dist, q, enc[k]);
+      }
+    }
+  }
+};
+
+template <int kUnroll, typename Lambda>
+struct loadAndComputeDist<kUnroll, Lambda, 2, int8_t, int32_t> {
+  Lambda compute_dist;
+  int32_t& dist;
+  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
+                                                      const int8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      int32_t encV      = reinterpret_cast<uint16_t const*>(data)[loadIndex + j * kIndexGroupSize];
+      int32_t queryRegs = reinterpret_cast<uint16_t const*>(query_shared + shmemIndex)[j];
+      compute_dist(dist, queryRegs, encV);
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
+                                                        const int8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    int32_t queryReg =
+      (lane_id < 16) ? reinterpret_cast<uint16_t const*>(query + baseLoadIndex)[lane_id] : 0;
+    constexpr int veclen = 2;
+    constexpr int stride = kUnroll * veclen;
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        int32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
+        int32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
+        compute_dist(dist, q, encV);
+      }
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
+    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
+  {
+    constexpr int veclen = 2;
+    int loadDim          = dimBlocks + lane_id * veclen;
+    int32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
+      int32_t enc = reinterpret_cast<uint16_t const*>(data + lane_id * veclen)[0];
+      int32_t q   = shfl(queryReg, d / veclen, WarpSize);
+      compute_dist(dist, q, enc);
+    }
+  }
+};
+
+template <int kUnroll, typename Lambda>
+struct loadAndComputeDist<kUnroll, Lambda, 1, int8_t, int32_t> {
+  Lambda compute_dist;
+  int32_t& dist;
+  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
+    : dist(dist), compute_dist(op)
+  {
+  }
+
+  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
+                                                      const int8_t* query_shared,
+                                                      int loadIndex,
+                                                      int shmemIndex)
+  {
+#pragma unroll
+    for (int j = 0; j < kUnroll; ++j) {
+      compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]);
+    }
+  }
+
+  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
+                                                        const int8_t* query,
+                                                        int baseLoadIndex,
+                                                        const int lane_id)
+  {
+    constexpr int veclen = 1;
+    constexpr int stride = kUnroll * veclen;
+    int32_t queryReg     = query[baseLoadIndex + lane_id];
+
+#pragma unroll
+    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
+#pragma unroll
+      for (int j = 0; j < kUnroll; ++j) {
+        compute_dist(
+          dist, shfl(queryReg, i * kUnroll + j, WarpSize), data[lane_id + j * kIndexGroupSize]);
+      }
+    }
+  }
+  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
+    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
+  {
+    constexpr int veclen = 1;
+    const int loadDim    = dimBlocks + lane_id;
+    int32_t queryReg     = loadDim < dim ? query[loadDim] : 0;
+    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
+      compute_dist(dist, shfl(queryReg, d, WarpSize), data[lane_id]);
+    }
+  }
+};
+
+/**
+ * Scan clusters for nearest neighbors of the query vectors.
+ * See `ivfflat_interleaved_scan` for more information.
+ *
+ * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp.
+ * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is
+ * calculated, and the top-k nearest neighbors are selected.
+ *
+ * @param compute_dist distance function
+ * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a
+ * block; this number must be a multiple of `WarpSize * Veclen`.
+ * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim]
+ * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes]
+ * @param[in] list_indices index<T, IdxT>.indices
+ * @param[in] list_data index<T, IdxT>.data
+ * @param[in] list_sizes index<T, IdxT>.list_sizes
+ * @param[in] list_offsets index<T, IdxT>.list_offsets
+ * @param n_probes
+ * @param k
+ * @param dim
+ * @param[out] neighbors
+ * @param[out] distances
+ */
+template <int Capacity,
+          int Veclen,
+          bool Ascending,
+          typename T,
+          typename AccT,
+          typename IdxT,
+          typename Lambda,
+          typename PostLambda>
+__global__ void __launch_bounds__(kThreadsPerBlock)
+  interleaved_scan_kernel(Lambda compute_dist,
+                          PostLambda post_process,
+                          const uint32_t query_smem_elems,
+                          const T* query,
+                          const uint32_t* coarse_index,
+                          const IdxT* const* list_indices_ptrs,
+                          const T* const* list_data_ptrs,
+                          const uint32_t* list_sizes,
+                          const uint32_t n_probes,
+                          const uint32_t k,
+                          const uint32_t dim,
+                          IdxT* neighbors,
+                          float* distances)
+{
+  extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[];
+  // Using shared memory for the (part of the) query;
+  // This allows to save on global memory bandwidth when reading index and query
+  // data at the same time.
+  // Its size is `query_smem_elems`.
+  T* query_shared = reinterpret_cast<T*>(interleaved_scan_kernel_smem);
+  // Make the query input and output point to this block's shared query
+  {
+    const int query_id = blockIdx.y;
+    query += query_id * dim;
+    neighbors += query_id * k * gridDim.x + blockIdx.x * k;
+    distances += query_id * k * gridDim.x + blockIdx.x * k;
+    coarse_index += query_id * n_probes;
+  }
+
+  // Copy a part of the query into shared memory for faster processing
+  copy_vectorized(query_shared, query, std::min(dim, query_smem_elems));
+  __syncthreads();
+
+  using block_sort_t = matrix::detail::select::warpsort::block_sort<
+    matrix::detail::select::warpsort::warp_sort_filtered,
+    Capacity,
+    Ascending,
+    float,
+    IdxT>;
+  block_sort_t queue(k);
+
+  {
+    using align_warp  = Pow2<WarpSize>;
+    const int lane_id = align_warp::mod(threadIdx.x);
+
+    // How many full warps needed to compute the distance (without remainder)
+    const uint32_t full_warps_along_dim = align_warp::roundDown(dim);
+
+    const uint32_t shm_assisted_dim =
+      (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim;
+
+    // Every CUDA block scans one cluster at a time.
+    for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) {
+      const uint32_t list_id = coarse_index[probe_id];  // The id of cluster(list)
+
+      // The number of vectors in each cluster(list); [nlist]
+      const uint32_t list_length = list_sizes[list_id];
+
+      // The number of interleaved groups to be processed
+      const uint32_t num_groups =
+        align_warp::div(list_length + align_warp::Mask);  // ceildiv by power of 2
+
+      constexpr int kUnroll        = WarpSize / Veclen;
+      constexpr uint32_t kNumWarps = kThreadsPerBlock / WarpSize;
+      // Every warp reads WarpSize vectors and computes the distances to them.
+      // Then, the distances and corresponding ids are distributed among the threads,
+      // and each thread adds one (id, dist) pair to the filtering queue.
+      for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups;
+           group_id += kNumWarps) {
+        AccT dist = 0;
+        // This is where this warp begins reading data (start position of an interleaved group)
+        const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim;
+
+        // This is the vector a given lane/thread handles
+        const uint32_t vec_id = group_id * WarpSize + lane_id;
+        const bool valid      = vec_id < list_length;
+
+        // Process first shm_assisted_dim dimensions (always using shared memory)
+        if (valid) {
+          loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
+                                                                                  compute_dist);
+          for (int pos = 0; pos < shm_assisted_dim;
+               pos += WarpSize, data += kIndexGroupSize * WarpSize) {
+            lc.runLoadShmemCompute(data, query_shared, lane_id, pos);
+          }
+        }
+
+        if (dim > query_smem_elems) {
+          // The default path - using shfl ops - for dimensions beyond query_smem_elems
+          loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
+                                                                                  compute_dist);
+          for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += WarpSize) {
+            lc.runLoadShflAndCompute(data, query, pos, lane_id);
+          }
+          lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim);
+        } else {
+          // when  shm_assisted_dim == full_warps_along_dim < dim
+          if (valid) {
+            loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT> lc(dist, compute_dist);
+            for (int pos = full_warps_along_dim; pos < dim;
+                 pos += Veclen, data += kIndexGroupSize * Veclen) {
+              lc.runLoadShmemCompute(data, query_shared, lane_id, pos);
+            }
+          }
+        }
+
+        // Enqueue one element per thread
+        const float val  = valid ? static_cast<float>(dist) : block_sort_t::queue_t::kDummy;
+        const size_t idx = valid ? static_cast<size_t>(list_indices_ptrs[list_id][vec_id]) : 0;
+        queue.add(val, idx);
+      }
+    }
+  }
+
+  // finalize and store selected neighbours
+  __syncthreads();
+  queue.done(interleaved_scan_kernel_smem);
+  queue.store(distances, neighbors, post_process);
+}
+
+/**
+ *  Configure the gridDim.x to maximize GPU occupancy, but reduce the output size
+ */
+template <typename T>
+uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, T func)
+{
+  int dev_id;
+  RAFT_CUDA_TRY(cudaGetDevice(&dev_id));
+  int num_sms;
+  RAFT_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
+  int num_blocks_per_sm = 0;
+  RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+    &num_blocks_per_sm, func, kThreadsPerBlock, sMemSize));
+
+  size_t min_grid_size = num_sms * num_blocks_per_sm;
+  size_t min_grid_x    = ceildiv<size_t>(min_grid_size, numQueries);
+  return min_grid_x > n_probes ? n_probes : static_cast<uint32_t>(min_grid_x);
+}
+
+template <int Capacity,
+          int Veclen,
+          bool Ascending,
+          typename T,
+          typename AccT,
+          typename IdxT,
+          typename Lambda,
+          typename PostLambda>
+void launch_kernel(Lambda lambda,
+                   PostLambda post_process,
+                   const index<T, IdxT>& index,
+                   const T* queries,
+                   const uint32_t* coarse_index,
+                   const uint32_t num_queries,
+                   const uint32_t n_probes,
+                   const uint32_t k,
+                   IdxT* neighbors,
+                   float* distances,
+                   uint32_t& grid_dim_x,
+                   rmm::cuda_stream_view stream)
+{
+  RAFT_EXPECTS(Veclen == index.veclen(),
+               "Configured Veclen does not match the index interleaving pattern.");
+  constexpr auto kKernel =
+    interleaved_scan_kernel<Capacity, Veclen, Ascending, T, AccT, IdxT, Lambda, PostLambda>;
+  const int max_query_smem = 16384;
+  int query_smem_elems =
+    std::min<int>(max_query_smem / sizeof(T), Pow2<Veclen * WarpSize>::roundUp(index.dim()));
+  int smem_size              = query_smem_elems * sizeof(T);
+  constexpr int kSubwarpSize = std::min<int>(Capacity, WarpSize);
+  auto block_merge_mem =
+    raft::matrix::detail::select::warpsort::calc_smem_size_for_block_wide<AccT, IdxT>(
+      kThreadsPerBlock / kSubwarpSize, k);
+  smem_size += std::max<int>(smem_size, block_merge_mem);
+
+  // power-of-two less than cuda limit (for better addr alignment)
+  constexpr uint32_t kMaxGridY = 32768;
+
+  if (grid_dim_x == 0) {
+    grid_dim_x = configure_launch_x(std::min(kMaxGridY, num_queries), n_probes, smem_size, kKernel);
+    return;
+  }
+
+  for (uint32_t query_offset = 0; query_offset < num_queries; query_offset += kMaxGridY) {
+    uint32_t grid_dim_y = std::min<uint32_t>(kMaxGridY, num_queries - query_offset);
+    dim3 grid_dim(grid_dim_x, grid_dim_y, 1);
+    dim3 block_dim(kThreadsPerBlock);
+    RAFT_LOG_TRACE(
+      "Launching the ivf-flat interleaved_scan_kernel (%d, %d, 1) x (%d, 1, 1), n_probes = %d, "
+      "smem_size = %d",
+      grid_dim.x,
+      grid_dim.y,
+      block_dim.x,
+      n_probes,
+      smem_size);
+    kKernel<<<grid_dim, block_dim, smem_size, stream>>>(lambda,
+                                                        post_process,
+                                                        query_smem_elems,
+                                                        queries,
+                                                        coarse_index,
+                                                        index.inds_ptrs().data_handle(),
+                                                        index.data_ptrs().data_handle(),
+                                                        index.list_sizes().data_handle(),
+                                                        n_probes,
+                                                        k,
+                                                        index.dim(),
+                                                        neighbors,
+                                                        distances);
+    queries += grid_dim_y * index.dim();
+    neighbors += grid_dim_y * grid_dim_x * k;
+    distances += grid_dim_y * grid_dim_x * k;
+  }
+}
+
+template <int Veclen, typename T, typename AccT>
+struct euclidean_dist {
+  __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y)
+  {
+    const auto diff = x - y;
+    acc += diff * diff;
+  }
+};
+
+template <int Veclen>
+struct euclidean_dist<Veclen, uint8_t, uint32_t> {
+  __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y)
+  {
+    if constexpr (Veclen > 1) {
+      const auto diff = __vabsdiffu4(x, y);
+      acc             = dp4a(diff, diff, acc);
+    } else {
+      const auto diff = __usad(x, y, 0u);
+      acc += diff * diff;
+    }
+  }
+};
+
+template <int Veclen>
+struct euclidean_dist<Veclen, int8_t, int32_t> {
+  __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y)
+  {
+    if constexpr (Veclen > 1) {
+      // Note that we enforce here that the unsigned version of dp4a is used, because the difference
+      // between two int8 numbers can be greater than 127 and therefore represented as a negative
+      // number in int8. Casting from int8 to int32 would yield incorrect results, while casting
+      // from uint8 to uint32 is correct.
+      const auto diff = __vabsdiffs4(x, y);
+      acc             = dp4a(diff, diff, static_cast<uint32_t>(acc));
+    } else {
+      const auto diff = x - y;
+      acc += diff * diff;
+    }
+  }
+};
+
+template <int Veclen, typename T, typename AccT>
+struct inner_prod_dist {
+  __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y)
+  {
+    if constexpr (Veclen > 1 && (std::is_same_v<T, int8_t> || std::is_same_v<T, uint8_t>)) {
+      acc = dp4a(x, y, acc);
+    } else {
+      acc += x * y;
+    }
+  }
+};
+
+/** Select the distance computation function and forward the rest of the arguments. */
+template <int Capacity,
+          int Veclen,
+          bool Ascending,
+          typename T,
+          typename AccT,
+          typename IdxT,
+          typename... Args>
+void launch_with_fixed_consts(raft::distance::DistanceType metric, Args&&... args)
+{
+  switch (metric) {
+    case raft::distance::DistanceType::L2Expanded:
+    case raft::distance::DistanceType::L2Unexpanded:
+      return launch_kernel<Capacity,
+                           Veclen,
+                           Ascending,
+                           T,
+                           AccT,
+                           IdxT,
+                           euclidean_dist<Veclen, T, AccT>,
+                           raft::identity_op>({}, {}, std::forward<Args>(args)...);
+    case raft::distance::DistanceType::L2SqrtExpanded:
+    case raft::distance::DistanceType::L2SqrtUnexpanded:
+      return launch_kernel<Capacity,
+                           Veclen,
+                           Ascending,
+                           T,
+                           AccT,
+                           IdxT,
+                           euclidean_dist<Veclen, T, AccT>,
+                           raft::sqrt_op>({}, {}, std::forward<Args>(args)...);
+    case raft::distance::DistanceType::InnerProduct:
+      return launch_kernel<Capacity,
+                           Veclen,
+                           Ascending,
+                           T,
+                           AccT,
+                           IdxT,
+                           inner_prod_dist<Veclen, T, AccT>,
+                           raft::identity_op>({}, {}, std::forward<Args>(args)...);
+    // NB: update the description of `knn::ivf_flat::build` when adding here a new metric.
+    default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric));
+  }
+}
+
+/**
+ * Lift the `capacity` and `veclen` parameters to the template level,
+ * forward the rest of the arguments unmodified to `launch_interleaved_scan_kernel`.
+ */
+template <typename T,
+          typename AccT,
+          typename IdxT,
+          int Capacity = matrix::detail::select::warpsort::kMaxCapacity,
+          int Veclen   = std::max<int>(1, 16 / sizeof(T))>
+struct select_interleaved_scan_kernel {
+  /**
+   * Recursively reduce the `Capacity` and `Veclen` parameters until they match the
+   * corresponding runtime arguments.
+   * By default, this recursive process starts with maximum possible values of the
+   * two parameters and ends with both values equal to 1.
+   */
+  template <typename... Args>
+  static inline void run(int capacity, int veclen, bool select_min, Args&&... args)
+  {
+    if constexpr (Capacity > 1) {
+      if (capacity * 2 <= Capacity) {
+        return select_interleaved_scan_kernel<T, AccT, IdxT, Capacity / 2, Veclen>::run(
+          capacity, veclen, select_min, std::forward<Args>(args)...);
+      }
+    }
+    if constexpr (Veclen > 1) {
+      if (veclen % Veclen != 0) {
+        return select_interleaved_scan_kernel<T, AccT, IdxT, Capacity, 1>::run(
+          capacity, 1, select_min, std::forward<Args>(args)...);
+      }
+    }
+    // NB: this is the limitation of the warpsort structures that use a huge number of
+    //     registers (used in the main kernel here).
+    RAFT_EXPECTS(capacity == Capacity,
+                 "Capacity must be power-of-two not bigger than the maximum allowed size "
+                 "matrix::detail::select::warpsort::kMaxCapacity (%d).",
+                 matrix::detail::select::warpsort::kMaxCapacity);
+    RAFT_EXPECTS(
+      veclen == Veclen,
+      "Veclen must be power-of-two not bigger than the maximum allowed size for this data type.");
+    if (select_min) {
+      launch_with_fixed_consts<Capacity, Veclen, true, T, AccT, IdxT>(std::forward<Args>(args)...);
+    } else {
+      launch_with_fixed_consts<Capacity, Veclen, false, T, AccT, IdxT>(std::forward<Args>(args)...);
+    }
+  }
+};
+
+/**
+ * @brief Configure and launch an appropriate template instance of the interleaved scan kernel.
+ *
+ * @tparam T value type
+ * @tparam AccT accumulated type
+ * @tparam IdxT type of the indices
+ *
+ * @param index previously built ivf-flat index
+ * @param[in] queries device pointer to the query vectors [batch_size, dim]
+ * @param[in] coarse_query_results device pointer to the cluster (list) ids [batch_size, n_probes]
+ * @param n_queries batch size
+ * @param metric type of the measured distance
+ * @param n_probes number of nearest clusters to query
+ * @param k number of nearest neighbors.
+ *            NB: the maximum value of `k` is limited statically by `kMaxCapacity`.
+ * @param select_min whether to select nearest (true) or furthest (false) points w.r.t. the given
+ * metric.
+ * @param[out] neighbors device pointer to the result indices for each query and cluster
+ * [batch_size, grid_dim_x, k]
+ * @param[out] distances device pointer to the result distances for each query and cluster
+ * [batch_size, grid_dim_x, k]
+ * @param[inout] grid_dim_x number of blocks launched across all n_probes clusters;
+ *               (one block processes one or more probes, hence: 1 <= grid_dim_x <= n_probes)
+ * @param stream
+ */
+template <typename T, typename AccT, typename IdxT>
+void ivfflat_interleaved_scan(const index<T, IdxT>& index,
+                              const T* queries,
+                              const uint32_t* coarse_query_results,
+                              const uint32_t n_queries,
+                              const raft::distance::DistanceType metric,
+                              const uint32_t n_probes,
+                              const uint32_t k,
+                              const bool select_min,
+                              IdxT* neighbors,
+                              float* distances,
+                              uint32_t& grid_dim_x,
+                              rmm::cuda_stream_view stream)
+{
+  const int capacity = bound_by_power_of_two(k);
+  select_interleaved_scan_kernel<T, AccT, IdxT>::run(capacity,
+                                                     index.veclen(),
+                                                     select_min,
+                                                     metric,
+                                                     index,
+                                                     queries,
+                                                     coarse_query_results,
+                                                     n_queries,
+                                                     n_probes,
+                                                     k,
+                                                     neighbors,
+                                                     distances,
+                                                     grid_dim_x,
+                                                     stream);
+}
+
+}  // namespace raft::neighbors::ivf_flat::detail
diff --git a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh
similarity index 76%
rename from cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu
rename to cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh
index 7c80eb29d0..63f341dd9a 100644
--- a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_double.cu
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh
@@ -14,7 +14,12 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/gram_matrix.cuh>
-#include <raft/distance/specializations.cuh>
+#pragma once
 
-template class raft::distance::kernels::detail::GramMatrixBase<double>;
\ No newline at end of file
+#if !defined(RAFT_EXPLICIT_INSTANTIATE_ONLY)
+#include "ivf_flat_interleaved_scan-inl.cuh"
+#endif
+
+#ifdef RAFT_COMPILED
+#include "ivf_flat_interleaved_scan-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
new file mode 100644
index 0000000000..14d15711a6
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                            // uintX_t
+#include <raft/neighbors/ivf_flat_types.hpp>  // raft::neighbors::ivf_flat::index
+#include <raft/util/raft_explicit.hpp>        // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ivf_flat::detail {
+
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const raft::neighbors::ivf_flat::index<T, IdxT>& index,
+            const T* queries,
+            uint32_t n_queries,
+            uint32_t k,
+            IdxT* neighbors,
+            float* distances,
+            rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ivf_flat::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ivf_flat_detail_search(T, IdxT)         \
+  extern template void raft::neighbors::ivf_flat::detail::search<T, IdxT>( \
+    raft::device_resources const& handle,                                  \
+    const search_params& params,                                           \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,                \
+    const T* queries,                                                      \
+    uint32_t n_queries,                                                    \
+    uint32_t k,                                                            \
+    IdxT* neighbors,                                                       \
+    float* distances,                                                      \
+    rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_neighbors_ivf_flat_detail_search(float, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_search(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_search(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_search
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
new file mode 100644
index 0000000000..89a4597acf
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_resources.hpp>                       // raft::device_resources
+#include <raft/core/logger.hpp>                                 // RAFT_LOG_TRACE
+#include <raft/distance/distance_types.hpp>                     // is_min_close, DistanceType
+#include <raft/linalg/gemm.cuh>                                 // raft::linalg::gemm
+#include <raft/linalg/norm.cuh>                                 // raft::linalg::norm
+#include <raft/linalg/unary_op.cuh>                             // raft::linalg::unary_op
+#include <raft/matrix/detail/select_k.cuh>                      // matrix::detail::select_k
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan.cuh>  // interleaved_scan
+#include <raft/neighbors/ivf_flat_types.hpp>                    // raft::neighbors::ivf_flat::index
+#include <raft/spatial/knn/detail/ann_utils.cuh>                // utils::mapping
+#include <rmm/mr/device/per_device_resource.hpp>                // rmm::device_memory_resource
+
+namespace raft::neighbors::ivf_flat::detail {
+
+using namespace raft::spatial::knn::detail;  // NOLINT
+
+template <typename T, typename AccT, typename IdxT>
+void search_impl(raft::device_resources const& handle,
+                 const raft::neighbors::ivf_flat::index<T, IdxT>& index,
+                 const T* queries,
+                 uint32_t n_queries,
+                 uint32_t k,
+                 uint32_t n_probes,
+                 bool select_min,
+                 IdxT* neighbors,
+                 AccT* distances,
+                 rmm::mr::device_memory_resource* search_mr)
+{
+  auto stream = handle.get_stream();
+  // The norm of query
+  rmm::device_uvector<float> query_norm_dev(n_queries, stream, search_mr);
+  // The distance value of cluster(list) and queries
+  rmm::device_uvector<float> distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr);
+  // The topk distance value of cluster(list) and queries
+  rmm::device_uvector<float> coarse_distances_dev(n_queries * n_probes, stream, search_mr);
+  // The topk  index of cluster(list) and queries
+  rmm::device_uvector<uint32_t> coarse_indices_dev(n_queries * n_probes, stream, search_mr);
+  // The topk distance value of candidate vectors from each cluster(list)
+  rmm::device_uvector<AccT> refined_distances_dev(n_queries * n_probes * k, stream, search_mr);
+  // The topk index of candidate vectors from each cluster(list)
+  rmm::device_uvector<IdxT> refined_indices_dev(n_queries * n_probes * k, stream, search_mr);
+
+  size_t float_query_size;
+  if constexpr (std::is_integral_v<T>) {
+    float_query_size = n_queries * index.dim();
+  } else {
+    float_query_size = 0;
+  }
+  rmm::device_uvector<float> converted_queries_dev(float_query_size, stream, search_mr);
+  float* converted_queries_ptr = converted_queries_dev.data();
+
+  if constexpr (std::is_same_v<T, float>) {
+    converted_queries_ptr = const_cast<float*>(queries);
+  } else {
+    linalg::unaryOp(
+      converted_queries_ptr, queries, n_queries * index.dim(), utils::mapping<float>{}, stream);
+  }
+
+  float alpha = 1.0f;
+  float beta  = 0.0f;
+
+  // todo(lsugy): raft distance? (if performance is similar/better than gemm)
+  switch (index.metric()) {
+    case raft::distance::DistanceType::L2Expanded:
+    case raft::distance::DistanceType::L2SqrtExpanded: {
+      alpha = -2.0f;
+      beta  = 1.0f;
+      raft::linalg::rowNorm(query_norm_dev.data(),
+                            converted_queries_ptr,
+                            static_cast<IdxT>(index.dim()),
+                            static_cast<IdxT>(n_queries),
+                            raft::linalg::L2Norm,
+                            true,
+                            stream);
+      utils::outer_add(query_norm_dev.data(),
+                       (IdxT)n_queries,
+                       index.center_norms()->data_handle(),
+                       (IdxT)index.n_lists(),
+                       distance_buffer_dev.data(),
+                       stream);
+      RAFT_LOG_TRACE_VEC(index.center_norms()->data_handle(), std::min<uint32_t>(20, index.dim()));
+      RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min<uint32_t>(20, index.n_lists()));
+      break;
+    }
+    default: {
+      alpha = 1.0f;
+      beta  = 0.0f;
+    }
+  }
+
+  linalg::gemm(handle,
+               true,
+               false,
+               index.n_lists(),
+               n_queries,
+               index.dim(),
+               &alpha,
+               index.centers().data_handle(),
+               index.dim(),
+               converted_queries_ptr,
+               index.dim(),
+               &beta,
+               distance_buffer_dev.data(),
+               index.n_lists(),
+               stream);
+
+  RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min<uint32_t>(20, index.n_lists()));
+  matrix::detail::select_k<AccT, uint32_t>(distance_buffer_dev.data(),
+                                           nullptr,
+                                           n_queries,
+                                           index.n_lists(),
+                                           n_probes,
+                                           coarse_distances_dev.data(),
+                                           coarse_indices_dev.data(),
+                                           select_min,
+                                           stream,
+                                           search_mr);
+  RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), n_probes);
+  RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), n_probes);
+
+  auto distances_dev_ptr = refined_distances_dev.data();
+  auto indices_dev_ptr   = refined_indices_dev.data();
+
+  uint32_t grid_dim_x = 0;
+  if (n_probes > 1) {
+    // query the gridDimX size to store probes topK output
+    ivfflat_interleaved_scan<T, typename utils::config<T>::value_t, IdxT>(index,
+                                                                          nullptr,
+                                                                          nullptr,
+                                                                          n_queries,
+                                                                          index.metric(),
+                                                                          n_probes,
+                                                                          k,
+                                                                          select_min,
+                                                                          nullptr,
+                                                                          nullptr,
+                                                                          grid_dim_x,
+                                                                          stream);
+  } else {
+    grid_dim_x = 1;
+  }
+
+  if (grid_dim_x == 1) {
+    distances_dev_ptr = distances;
+    indices_dev_ptr   = neighbors;
+  }
+
+  ivfflat_interleaved_scan<T, typename utils::config<T>::value_t, IdxT>(index,
+                                                                        queries,
+                                                                        coarse_indices_dev.data(),
+                                                                        n_queries,
+                                                                        index.metric(),
+                                                                        n_probes,
+                                                                        k,
+                                                                        select_min,
+                                                                        indices_dev_ptr,
+                                                                        distances_dev_ptr,
+                                                                        grid_dim_x,
+                                                                        stream);
+
+  RAFT_LOG_TRACE_VEC(distances_dev_ptr, 2 * k);
+  RAFT_LOG_TRACE_VEC(indices_dev_ptr, 2 * k);
+
+  // Merge topk values from different blocks
+  if (grid_dim_x > 1) {
+    matrix::detail::select_k<AccT, IdxT>(refined_distances_dev.data(),
+                                         refined_indices_dev.data(),
+                                         n_queries,
+                                         k * grid_dim_x,
+                                         k,
+                                         distances,
+                                         neighbors,
+                                         select_min,
+                                         stream,
+                                         search_mr);
+  }
+}
+
+/** See raft::neighbors::ivf_flat::search docs */
+template <typename T, typename IdxT>
+inline void search(raft::device_resources const& handle,
+                   const search_params& params,
+                   const index<T, IdxT>& index,
+                   const T* queries,
+                   uint32_t n_queries,
+                   uint32_t k,
+                   IdxT* neighbors,
+                   float* distances,
+                   rmm::mr::device_memory_resource* mr = nullptr)
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "ivf_flat::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim());
+
+  RAFT_EXPECTS(params.n_probes > 0,
+               "n_probes (number of clusters to probe in the search) must be positive.");
+  auto n_probes = std::min<uint32_t>(params.n_probes, index.n_lists());
+
+  auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
+  if (pool_guard) {
+    RAFT_LOG_DEBUG("ivf_flat::search: using pool memory resource with initial size %zu bytes",
+                   pool_guard->pool_size());
+  }
+
+  return search_impl<T, float, IdxT>(handle,
+                                     index,
+                                     queries,
+                                     n_queries,
+                                     k,
+                                     n_probes,
+                                     raft::distance::is_min_close(index.metric()),
+                                     neighbors,
+                                     distances,
+                                     mr);
+}
+
+}  // namespace raft::neighbors::ivf_flat::detail
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh
index e6533eaf51..7b03ebeab6 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh
@@ -13,1280 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/core/cudart_utils.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/mdarray.hpp>
-#include <raft/core/operators.hpp>
-#include <raft/distance/distance.cuh>
-#include <raft/distance/distance_types.hpp>
-#include <raft/linalg/norm.cuh>
-#include <raft/linalg/unary_op.cuh>
-#include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/detail/select_warpsort.cuh>
-#include <raft/neighbors/ivf_flat_types.hpp>
-#include <raft/spatial/knn/detail/ann_utils.cuh>
-#include <raft/util/cuda_utils.cuh>
-#include <raft/util/device_loads_stores.cuh>
-#include <raft/util/integer_utils.hpp>
-#include <raft/util/pow2_utils.cuh>
-#include <raft/util/vectorized.cuh>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-namespace raft::neighbors::ivf_flat::detail {
-
-using namespace raft::spatial::knn::detail;  // NOLINT
-
-constexpr int kThreadsPerBlock = 128;
-
-/**
- * @brief Copy `n` elements per block from one place to another.
- *
- * @param[out] out target pointer (unique per block)
- * @param[in] in source pointer
- * @param n number of elements to copy
- */
-template <int VecBytes = 16, typename T>
-__device__ inline void copy_vectorized(T* out, const T* in, uint32_t n)
-{
-  constexpr int VecElems = VecBytes / sizeof(T);  // NOLINT
-  using align_bytes      = Pow2<(size_t)VecBytes>;
-  if constexpr (VecElems > 1) {
-    using align_elems = Pow2<VecElems>;
-    if (!align_bytes::areSameAlignOffsets(out, in)) {
-      return copy_vectorized<(VecBytes >> 1), T>(out, in, n);
-    }
-    {  // process unaligned head
-      uint32_t head = align_bytes::roundUp(in) - in;
-      if (head > 0) {
-        copy_vectorized<sizeof(T), T>(out, in, head);
-        n -= head;
-        in += head;
-        out += head;
-      }
-    }
-    {  // process main part vectorized
-      using vec_t = typename IOType<T, VecElems>::Type;
-      copy_vectorized<sizeof(vec_t), vec_t>(
-        reinterpret_cast<vec_t*>(out), reinterpret_cast<const vec_t*>(in), align_elems::div(n));
-    }
-    {  // process unaligned tail
-      uint32_t tail = align_elems::mod(n);
-      if (tail > 0) {
-        n -= tail;
-        copy_vectorized<sizeof(T), T>(out + n, in + n, tail);
-      }
-    }
-  }
-  if constexpr (VecElems <= 1) {
-    for (int i = threadIdx.x; i < n; i += blockDim.x) {
-      out[i] = in[i];
-    }
-  }
-}
-
-/**
- * @brief Load a part of a vector from the index and from query, compute the (part of the) distance
- * between them, and aggregate it using the provided Lambda; one structure per thread, per query,
- * and per index item.
- *
- * @tparam kUnroll elements per loop (normally, kUnroll = WarpSize / Veclen)
- * @tparam Lambda computing the part of the distance for one dimension and aggregating it:
- *                void (AccT& acc, AccT x, AccT y)
- * @tparam Veclen size of the vectorized load
- * @tparam T type of the data in the query and the index
- * @tparam AccT type of the accumulated value (an optimization for 8bit values to be loaded as 32bit
- * values)
- */
-template <int kUnroll, typename Lambda, int Veclen, typename T, typename AccT>
-struct loadAndComputeDist {
-  Lambda compute_dist;
-  AccT& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(AccT& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  /**
-   * Load parts of vectors from the index and query and accumulates the partial distance.
-   * This version assumes the query is stored in shared memory.
-   * Every thread here processes exactly kUnroll * Veclen elements independently of others.
-   */
-  template <typename IdxT>
-  __device__ __forceinline__ void runLoadShmemCompute(const T* const& data,
-                                                      const T* query_shared,
-                                                      IdxT loadIndex,
-                                                      IdxT shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      T encV[Veclen];
-      ldg(encV, data + (loadIndex + j * kIndexGroupSize) * Veclen);
-      T queryRegs[Veclen];
-      lds(queryRegs, &query_shared[shmemIndex + j * Veclen]);
-#pragma unroll
-      for (int k = 0; k < Veclen; ++k) {
-        compute_dist(dist, queryRegs[k], encV[k]);
-      }
-    }
-  }
-
-  /**
-   * Load parts of vectors from the index and query and accumulates the partial distance.
-   * This version assumes the query is stored in the global memory and is different for every
-   * thread. One warp loads exactly WarpSize query elements at once and then reshuffles them into
-   * corresponding threads (`WarpSize / (kUnroll * Veclen)` elements per thread at once).
-   */
-  template <typename IdxT>
-  __device__ __forceinline__ void runLoadShflAndCompute(const T*& data,
-                                                        const T* query,
-                                                        IdxT baseLoadIndex,
-                                                        const int lane_id)
-  {
-    T queryReg               = query[baseLoadIndex + lane_id];
-    constexpr int stride     = kUnroll * Veclen;
-    constexpr int totalIter  = WarpSize / stride;
-    constexpr int gmemStride = stride * kIndexGroupSize;
-#pragma unroll
-    for (int i = 0; i < totalIter; ++i, data += gmemStride) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        T encV[Veclen];
-        ldg(encV, data + (lane_id + j * kIndexGroupSize) * Veclen);
-        const int d = (i * kUnroll + j) * Veclen;
-#pragma unroll
-        for (int k = 0; k < Veclen; ++k) {
-          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
-        }
-      }
-    }
-  }
-
-  /**
-   * Load parts of vectors from the index and query and accumulates the partial distance.
-   * This version augments `runLoadShflAndCompute` when `dim` is not a multiple of `WarpSize`.
-   */
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
-    const T*& data, const T* query, const int lane_id, const int dim, const int dimBlocks)
-  {
-    const int loadDim     = dimBlocks + lane_id;
-    T queryReg            = loadDim < dim ? query[loadDim] : 0;
-    const int loadDataIdx = lane_id * Veclen;
-    for (int d = 0; d < dim - dimBlocks; d += Veclen, data += kIndexGroupSize * Veclen) {
-      T enc[Veclen];
-      ldg(enc, data + loadDataIdx);
-#pragma unroll
-      for (int k = 0; k < Veclen; k++) {
-        compute_dist(dist, shfl(queryReg, d + k, WarpSize), enc[k]);
-      }
-    }
-  }
-};
-
-// This handles uint8_t 8, 16 Veclens
-template <int kUnroll, typename Lambda, int uint8_veclen>
-struct loadAndComputeDist<kUnroll, Lambda, uint8_veclen, uint8_t, uint32_t> {
-  Lambda compute_dist;
-  uint32_t& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
-                                                      const uint8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-    constexpr int veclen_int = uint8_veclen / 4;  // converting uint8_t veclens to int
-    loadIndex                = loadIndex * veclen_int;
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      uint32_t encV[veclen_int];
-      ldg(encV,
-          reinterpret_cast<unsigned const*>(data) + loadIndex + j * kIndexGroupSize * veclen_int);
-      uint32_t queryRegs[veclen_int];
-      lds(queryRegs, reinterpret_cast<unsigned const*>(query_shared + shmemIndex) + j * veclen_int);
-#pragma unroll
-      for (int k = 0; k < veclen_int; k++) {
-        compute_dist(dist, queryRegs[k], encV[k]);
-      }
-    }
-  }
-  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
-                                                        const uint8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    constexpr int veclen_int = uint8_veclen / 4;  // converting uint8_t veclens to int
-    uint32_t queryReg =
-      (lane_id < 8) ? reinterpret_cast<unsigned const*>(query + baseLoadIndex)[lane_id] : 0;
-    constexpr int stride = kUnroll * uint8_veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        uint32_t encV[veclen_int];
-        ldg(encV,
-            reinterpret_cast<unsigned const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
-        const int d = (i * kUnroll + j) * veclen_int;
-#pragma unroll
-        for (int k = 0; k < veclen_int; ++k) {
-          compute_dist(dist, shfl(queryReg, d + k, WarpSize), encV[k]);
-        }
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
-                                                                 const uint8_t* query,
-                                                                 const int lane_id,
-                                                                 const int dim,
-                                                                 const int dimBlocks)
-  {
-    constexpr int veclen_int = uint8_veclen / 4;
-    const int loadDim        = dimBlocks + lane_id * 4;  // Here 4 is for 1 - int
-    uint32_t queryReg = loadDim < dim ? reinterpret_cast<uint32_t const*>(query + loadDim)[0] : 0;
-    for (int d = 0; d < dim - dimBlocks;
-         d += uint8_veclen, data += kIndexGroupSize * uint8_veclen) {
-      uint32_t enc[veclen_int];
-      ldg(enc, reinterpret_cast<uint32_t const*>(data) + lane_id * veclen_int);
-#pragma unroll
-      for (int k = 0; k < veclen_int; k++) {
-        uint32_t q = shfl(queryReg, (d / 4) + k, WarpSize);
-        compute_dist(dist, q, enc[k]);
-      }
-    }
-  }
-};
-
-// Keep this specialized uint8 Veclen = 4, because compiler is generating suboptimal code while
-// using above common template of int2/int4
-template <int kUnroll, typename Lambda>
-struct loadAndComputeDist<kUnroll, Lambda, 4, uint8_t, uint32_t> {
-  Lambda compute_dist;
-  uint32_t& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
-                                                      const uint8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      uint32_t encV      = reinterpret_cast<unsigned const*>(data)[loadIndex + j * kIndexGroupSize];
-      uint32_t queryRegs = reinterpret_cast<unsigned const*>(query_shared + shmemIndex)[j];
-      compute_dist(dist, queryRegs, encV);
-    }
-  }
-  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
-                                                        const uint8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    uint32_t queryReg =
-      (lane_id < 8) ? reinterpret_cast<unsigned const*>(query + baseLoadIndex)[lane_id] : 0;
-    constexpr int veclen = 4;
-    constexpr int stride = kUnroll * veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        uint32_t encV = reinterpret_cast<unsigned const*>(data)[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
-        compute_dist(dist, q, encV);
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
-                                                                 const uint8_t* query,
-                                                                 const int lane_id,
-                                                                 const int dim,
-                                                                 const int dimBlocks)
-  {
-    constexpr int veclen = 4;
-    const int loadDim    = dimBlocks + lane_id;
-    uint32_t queryReg    = loadDim < dim ? reinterpret_cast<unsigned const*>(query)[loadDim] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      uint32_t enc = reinterpret_cast<unsigned const*>(data)[lane_id];
-      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
-      compute_dist(dist, q, enc);
-    }
-  }
-};
-
-template <int kUnroll, typename Lambda>
-struct loadAndComputeDist<kUnroll, Lambda, 2, uint8_t, uint32_t> {
-  Lambda compute_dist;
-  uint32_t& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
-                                                      const uint8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      uint32_t encV      = reinterpret_cast<uint16_t const*>(data)[loadIndex + j * kIndexGroupSize];
-      uint32_t queryRegs = reinterpret_cast<uint16_t const*>(query_shared + shmemIndex)[j];
-      compute_dist(dist, queryRegs, encV);
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
-                                                        const uint8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    uint32_t queryReg =
-      (lane_id < 16) ? reinterpret_cast<uint16_t const*>(query + baseLoadIndex)[lane_id] : 0;
-    constexpr int veclen = 2;
-    constexpr int stride = kUnroll * veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        uint32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
-        compute_dist(dist, q, encV);
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
-                                                                 const uint8_t* query,
-                                                                 const int lane_id,
-                                                                 const int dim,
-                                                                 const int dimBlocks)
-  {
-    constexpr int veclen = 2;
-    int loadDim          = dimBlocks + lane_id * veclen;
-    uint32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      uint32_t enc = reinterpret_cast<uint16_t const*>(data)[lane_id];
-      uint32_t q   = shfl(queryReg, d / veclen, WarpSize);
-      compute_dist(dist, q, enc);
-    }
-  }
-};
-
-template <int kUnroll, typename Lambda>
-struct loadAndComputeDist<kUnroll, Lambda, 1, uint8_t, uint32_t> {
-  Lambda compute_dist;
-  uint32_t& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(uint32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const uint8_t* const& data,
-                                                      const uint8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      uint32_t encV      = data[loadIndex + j * kIndexGroupSize];
-      uint32_t queryRegs = query_shared[shmemIndex + j];
-      compute_dist(dist, queryRegs, encV);
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndCompute(const uint8_t*& data,
-                                                        const uint8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    uint32_t queryReg    = query[baseLoadIndex + lane_id];
-    constexpr int veclen = 1;
-    constexpr int stride = kUnroll * veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        uint32_t encV = data[lane_id + j * kIndexGroupSize];
-        uint32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
-        compute_dist(dist, q, encV);
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(const uint8_t*& data,
-                                                                 const uint8_t* query,
-                                                                 const int lane_id,
-                                                                 const int dim,
-                                                                 const int dimBlocks)
-  {
-    constexpr int veclen = 1;
-    int loadDim          = dimBlocks + lane_id;
-    uint32_t queryReg    = loadDim < dim ? query[loadDim] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      uint32_t enc = data[lane_id];
-      uint32_t q   = shfl(queryReg, d, WarpSize);
-      compute_dist(dist, q, enc);
-    }
-  }
-};
-
-// This device function is for int8 veclens 4, 8 and 16
-template <int kUnroll, typename Lambda, int int8_veclen>
-struct loadAndComputeDist<kUnroll, Lambda, int8_veclen, int8_t, int32_t> {
-  Lambda compute_dist;
-  int32_t& dist;
-
-  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
-                                                      const int8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-    constexpr int veclen_int = int8_veclen / 4;  // converting int8_t veclens to int
-
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      int32_t encV[veclen_int];
-      ldg(encV,
-          reinterpret_cast<int32_t const*>(data) + (loadIndex + j * kIndexGroupSize) * veclen_int);
-      int32_t queryRegs[veclen_int];
-      lds(queryRegs, reinterpret_cast<int32_t const*>(query_shared + shmemIndex) + j * veclen_int);
-#pragma unroll
-      for (int k = 0; k < veclen_int; k++) {
-        compute_dist(dist, queryRegs[k], encV[k]);
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
-                                                        const int8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    constexpr int veclen_int = int8_veclen / 4;  // converting int8_t veclens to int
-
-    int32_t queryReg =
-      (lane_id < 8) ? reinterpret_cast<int32_t const*>(query + baseLoadIndex)[lane_id] : 0;
-    constexpr int stride = kUnroll * int8_veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        int32_t encV[veclen_int];
-        ldg(encV,
-            reinterpret_cast<int32_t const*>(data) + (lane_id + j * kIndexGroupSize) * veclen_int);
-        const int d = (i * kUnroll + j) * veclen_int;
-#pragma unroll
-        for (int k = 0; k < veclen_int; ++k) {
-          int32_t q = shfl(queryReg, d + k, WarpSize);
-          compute_dist(dist, q, encV[k]);
-        }
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
-    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
-  {
-    constexpr int veclen_int = int8_veclen / 4;
-    const int loadDim        = dimBlocks + lane_id * 4;  // Here 4 is for 1 - int;
-    int32_t queryReg = loadDim < dim ? reinterpret_cast<int32_t const*>(query + loadDim)[0] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += int8_veclen, data += kIndexGroupSize * int8_veclen) {
-      int32_t enc[veclen_int];
-      ldg(enc, reinterpret_cast<int32_t const*>(data) + lane_id * veclen_int);
-#pragma unroll
-      for (int k = 0; k < veclen_int; k++) {
-        int32_t q = shfl(queryReg, (d / 4) + k, WarpSize);  // Here 4 is for 1 - int;
-        compute_dist(dist, q, enc[k]);
-      }
-    }
-  }
-};
-
-template <int kUnroll, typename Lambda>
-struct loadAndComputeDist<kUnroll, Lambda, 2, int8_t, int32_t> {
-  Lambda compute_dist;
-  int32_t& dist;
-  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
-                                                      const int8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      int32_t encV      = reinterpret_cast<uint16_t const*>(data)[loadIndex + j * kIndexGroupSize];
-      int32_t queryRegs = reinterpret_cast<uint16_t const*>(query_shared + shmemIndex)[j];
-      compute_dist(dist, queryRegs, encV);
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
-                                                        const int8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    int32_t queryReg =
-      (lane_id < 16) ? reinterpret_cast<uint16_t const*>(query + baseLoadIndex)[lane_id] : 0;
-    constexpr int veclen = 2;
-    constexpr int stride = kUnroll * veclen;
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        int32_t encV = reinterpret_cast<uint16_t const*>(data)[lane_id + j * kIndexGroupSize];
-        int32_t q    = shfl(queryReg, i * kUnroll + j, WarpSize);
-        compute_dist(dist, q, encV);
-      }
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
-    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
-  {
-    constexpr int veclen = 2;
-    int loadDim          = dimBlocks + lane_id * veclen;
-    int32_t queryReg = loadDim < dim ? reinterpret_cast<uint16_t const*>(query + loadDim)[0] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      int32_t enc = reinterpret_cast<uint16_t const*>(data + lane_id * veclen)[0];
-      int32_t q   = shfl(queryReg, d / veclen, WarpSize);
-      compute_dist(dist, q, enc);
-    }
-  }
-};
-
-template <int kUnroll, typename Lambda>
-struct loadAndComputeDist<kUnroll, Lambda, 1, int8_t, int32_t> {
-  Lambda compute_dist;
-  int32_t& dist;
-  __device__ __forceinline__ loadAndComputeDist(int32_t& dist, Lambda op)
-    : dist(dist), compute_dist(op)
-  {
-  }
-
-  __device__ __forceinline__ void runLoadShmemCompute(const int8_t* const& data,
-                                                      const int8_t* query_shared,
-                                                      int loadIndex,
-                                                      int shmemIndex)
-  {
-#pragma unroll
-    for (int j = 0; j < kUnroll; ++j) {
-      compute_dist(dist, query_shared[shmemIndex + j], data[loadIndex + j * kIndexGroupSize]);
-    }
-  }
-
-  __device__ __forceinline__ void runLoadShflAndCompute(const int8_t*& data,
-                                                        const int8_t* query,
-                                                        int baseLoadIndex,
-                                                        const int lane_id)
-  {
-    constexpr int veclen = 1;
-    constexpr int stride = kUnroll * veclen;
-    int32_t queryReg     = query[baseLoadIndex + lane_id];
-
-#pragma unroll
-    for (int i = 0; i < WarpSize / stride; ++i, data += stride * kIndexGroupSize) {
-#pragma unroll
-      for (int j = 0; j < kUnroll; ++j) {
-        compute_dist(
-          dist, shfl(queryReg, i * kUnroll + j, WarpSize), data[lane_id + j * kIndexGroupSize]);
-      }
-    }
-  }
-  __device__ __forceinline__ void runLoadShflAndComputeRemainder(
-    const int8_t*& data, const int8_t* query, const int lane_id, const int dim, const int dimBlocks)
-  {
-    constexpr int veclen = 1;
-    const int loadDim    = dimBlocks + lane_id;
-    int32_t queryReg     = loadDim < dim ? query[loadDim] : 0;
-    for (int d = 0; d < dim - dimBlocks; d += veclen, data += kIndexGroupSize * veclen) {
-      compute_dist(dist, shfl(queryReg, d, WarpSize), data[lane_id]);
-    }
-  }
-};
-
-/**
- * Scan clusters for nearest neighbors of the query vectors.
- * See `ivfflat_interleaved_scan` for more information.
- *
- * The clusters are stored in the interleaved index format described in ivf_flat_types.hpp.
- * For each query vector, a set of clusters is probed: the distance to each vector in the cluster is
- * calculated, and the top-k nearest neighbors are selected.
- *
- * @param compute_dist distance function
- * @param query_smem_elems number of dimensions of the query vector to fit in a shared memory of a
- * block; this number must be a multiple of `WarpSize * Veclen`.
- * @param[in] query a pointer to all queries in a row-major contiguous format [gridDim.y, dim]
- * @param[in] coarse_index a pointer to the cluster indices to search through [n_probes]
- * @param[in] list_indices index<T, IdxT>.indices
- * @param[in] list_data index<T, IdxT>.data
- * @param[in] list_sizes index<T, IdxT>.list_sizes
- * @param[in] list_offsets index<T, IdxT>.list_offsets
- * @param n_probes
- * @param k
- * @param dim
- * @param[out] neighbors
- * @param[out] distances
- */
-template <int Capacity,
-          int Veclen,
-          bool Ascending,
-          typename T,
-          typename AccT,
-          typename IdxT,
-          typename Lambda,
-          typename PostLambda>
-__global__ void __launch_bounds__(kThreadsPerBlock)
-  interleaved_scan_kernel(Lambda compute_dist,
-                          PostLambda post_process,
-                          const uint32_t query_smem_elems,
-                          const T* query,
-                          const uint32_t* coarse_index,
-                          const IdxT* const* list_indices_ptrs,
-                          const T* const* list_data_ptrs,
-                          const uint32_t* list_sizes,
-                          const uint32_t n_probes,
-                          const uint32_t k,
-                          const uint32_t dim,
-                          IdxT* neighbors,
-                          float* distances)
-{
-  extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[];
-  // Using shared memory for the (part of the) query;
-  // This allows to save on global memory bandwidth when reading index and query
-  // data at the same time.
-  // Its size is `query_smem_elems`.
-  T* query_shared = reinterpret_cast<T*>(interleaved_scan_kernel_smem);
-  // Make the query input and output point to this block's shared query
-  {
-    const int query_id = blockIdx.y;
-    query += query_id * dim;
-    neighbors += query_id * k * gridDim.x + blockIdx.x * k;
-    distances += query_id * k * gridDim.x + blockIdx.x * k;
-    coarse_index += query_id * n_probes;
-  }
-
-  // Copy a part of the query into shared memory for faster processing
-  copy_vectorized(query_shared, query, std::min(dim, query_smem_elems));
-  __syncthreads();
-
-  using block_sort_t = matrix::detail::select::warpsort::block_sort<
-    matrix::detail::select::warpsort::warp_sort_filtered,
-    Capacity,
-    Ascending,
-    float,
-    IdxT>;
-  block_sort_t queue(k);
-
-  {
-    using align_warp  = Pow2<WarpSize>;
-    const int lane_id = align_warp::mod(threadIdx.x);
-
-    // How many full warps needed to compute the distance (without remainder)
-    const uint32_t full_warps_along_dim = align_warp::roundDown(dim);
-
-    const uint32_t shm_assisted_dim =
-      (dim > query_smem_elems) ? query_smem_elems : full_warps_along_dim;
-
-    // Every CUDA block scans one cluster at a time.
-    for (int probe_id = blockIdx.x; probe_id < n_probes; probe_id += gridDim.x) {
-      const uint32_t list_id = coarse_index[probe_id];  // The id of cluster(list)
-
-      // The number of vectors in each cluster(list); [nlist]
-      const uint32_t list_length = list_sizes[list_id];
-
-      // The number of interleaved groups to be processed
-      const uint32_t num_groups =
-        align_warp::div(list_length + align_warp::Mask);  // ceildiv by power of 2
-
-      constexpr int kUnroll        = WarpSize / Veclen;
-      constexpr uint32_t kNumWarps = kThreadsPerBlock / WarpSize;
-      // Every warp reads WarpSize vectors and computes the distances to them.
-      // Then, the distances and corresponding ids are distributed among the threads,
-      // and each thread adds one (id, dist) pair to the filtering queue.
-      for (uint32_t group_id = align_warp::div(threadIdx.x); group_id < num_groups;
-           group_id += kNumWarps) {
-        AccT dist = 0;
-        // This is where this warp begins reading data (start position of an interleaved group)
-        const T* data = list_data_ptrs[list_id] + (group_id * kIndexGroupSize) * dim;
-
-        // This is the vector a given lane/thread handles
-        const uint32_t vec_id = group_id * WarpSize + lane_id;
-        const bool valid      = vec_id < list_length;
-
-        // Process first shm_assisted_dim dimensions (always using shared memory)
-        if (valid) {
-          loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
-                                                                                  compute_dist);
-          for (int pos = 0; pos < shm_assisted_dim;
-               pos += WarpSize, data += kIndexGroupSize * WarpSize) {
-            lc.runLoadShmemCompute(data, query_shared, lane_id, pos);
-          }
-        }
-
-        if (dim > query_smem_elems) {
-          // The default path - using shfl ops - for dimensions beyond query_smem_elems
-          loadAndComputeDist<kUnroll, decltype(compute_dist), Veclen, T, AccT> lc(dist,
-                                                                                  compute_dist);
-          for (int pos = shm_assisted_dim; pos < full_warps_along_dim; pos += WarpSize) {
-            lc.runLoadShflAndCompute(data, query, pos, lane_id);
-          }
-          lc.runLoadShflAndComputeRemainder(data, query, lane_id, dim, full_warps_along_dim);
-        } else {
-          // when  shm_assisted_dim == full_warps_along_dim < dim
-          if (valid) {
-            loadAndComputeDist<1, decltype(compute_dist), Veclen, T, AccT> lc(dist, compute_dist);
-            for (int pos = full_warps_along_dim; pos < dim;
-                 pos += Veclen, data += kIndexGroupSize * Veclen) {
-              lc.runLoadShmemCompute(data, query_shared, lane_id, pos);
-            }
-          }
-        }
-
-        // Enqueue one element per thread
-        const float val  = valid ? static_cast<float>(dist) : block_sort_t::queue_t::kDummy;
-        const size_t idx = valid ? static_cast<size_t>(list_indices_ptrs[list_id][vec_id]) : 0;
-        queue.add(val, idx);
-      }
-    }
-  }
-
-  // finalize and store selected neighbours
-  __syncthreads();
-  queue.done(interleaved_scan_kernel_smem);
-  queue.store(distances, neighbors, post_process);
-}
-
-/**
- *  Configure the gridDim.x to maximize GPU occupancy, but reduce the output size
- */
-template <typename T>
-uint32_t configure_launch_x(uint32_t numQueries, uint32_t n_probes, int32_t sMemSize, T func)
-{
-  int dev_id;
-  RAFT_CUDA_TRY(cudaGetDevice(&dev_id));
-  int num_sms;
-  RAFT_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
-  int num_blocks_per_sm = 0;
-  RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-    &num_blocks_per_sm, func, kThreadsPerBlock, sMemSize));
-
-  size_t min_grid_size = num_sms * num_blocks_per_sm;
-  size_t min_grid_x    = ceildiv<size_t>(min_grid_size, numQueries);
-  return min_grid_x > n_probes ? n_probes : static_cast<uint32_t>(min_grid_x);
-}
-
-template <int Capacity,
-          int Veclen,
-          bool Ascending,
-          typename T,
-          typename AccT,
-          typename IdxT,
-          typename Lambda,
-          typename PostLambda>
-void launch_kernel(Lambda lambda,
-                   PostLambda post_process,
-                   const index<T, IdxT>& index,
-                   const T* queries,
-                   const uint32_t* coarse_index,
-                   const uint32_t num_queries,
-                   const uint32_t n_probes,
-                   const uint32_t k,
-                   IdxT* neighbors,
-                   float* distances,
-                   uint32_t& grid_dim_x,
-                   rmm::cuda_stream_view stream)
-{
-  RAFT_EXPECTS(Veclen == index.veclen(),
-               "Configured Veclen does not match the index interleaving pattern.");
-  constexpr auto kKernel =
-    interleaved_scan_kernel<Capacity, Veclen, Ascending, T, AccT, IdxT, Lambda, PostLambda>;
-  const int max_query_smem = 16384;
-  int query_smem_elems =
-    std::min<int>(max_query_smem / sizeof(T), Pow2<Veclen * WarpSize>::roundUp(index.dim()));
-  int smem_size              = query_smem_elems * sizeof(T);
-  constexpr int kSubwarpSize = std::min<int>(Capacity, WarpSize);
-  auto block_merge_mem =
-    raft::matrix::detail::select::warpsort::calc_smem_size_for_block_wide<AccT, IdxT>(
-      kThreadsPerBlock / kSubwarpSize, k);
-  smem_size += std::max<int>(smem_size, block_merge_mem);
-
-  // power-of-two less than cuda limit (for better addr alignment)
-  constexpr uint32_t kMaxGridY = 32768;
-
-  if (grid_dim_x == 0) {
-    grid_dim_x = configure_launch_x(std::min(kMaxGridY, num_queries), n_probes, smem_size, kKernel);
-    return;
-  }
-
-  for (uint32_t query_offset = 0; query_offset < num_queries; query_offset += kMaxGridY) {
-    uint32_t grid_dim_y = std::min<uint32_t>(kMaxGridY, num_queries - query_offset);
-    dim3 grid_dim(grid_dim_x, grid_dim_y, 1);
-    dim3 block_dim(kThreadsPerBlock);
-    RAFT_LOG_TRACE(
-      "Launching the ivf-flat interleaved_scan_kernel (%d, %d, 1) x (%d, 1, 1), n_probes = %d, "
-      "smem_size = %d",
-      grid_dim.x,
-      grid_dim.y,
-      block_dim.x,
-      n_probes,
-      smem_size);
-    kKernel<<<grid_dim, block_dim, smem_size, stream>>>(lambda,
-                                                        post_process,
-                                                        query_smem_elems,
-                                                        queries,
-                                                        coarse_index,
-                                                        index.inds_ptrs().data_handle(),
-                                                        index.data_ptrs().data_handle(),
-                                                        index.list_sizes().data_handle(),
-                                                        n_probes,
-                                                        k,
-                                                        index.dim(),
-                                                        neighbors,
-                                                        distances);
-    queries += grid_dim_y * index.dim();
-    neighbors += grid_dim_y * grid_dim_x * k;
-    distances += grid_dim_y * grid_dim_x * k;
-  }
-}
-
-template <int Veclen, typename T, typename AccT>
-struct euclidean_dist {
-  __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y)
-  {
-    const auto diff = x - y;
-    acc += diff * diff;
-  }
-};
-
-template <int Veclen>
-struct euclidean_dist<Veclen, uint8_t, uint32_t> {
-  __device__ __forceinline__ void operator()(uint32_t& acc, uint32_t x, uint32_t y)
-  {
-    if constexpr (Veclen > 1) {
-      const auto diff = __vabsdiffu4(x, y);
-      acc             = dp4a(diff, diff, acc);
-    } else {
-      const auto diff = __usad(x, y, 0u);
-      acc += diff * diff;
-    }
-  }
-};
-
-template <int Veclen>
-struct euclidean_dist<Veclen, int8_t, int32_t> {
-  __device__ __forceinline__ void operator()(int32_t& acc, int32_t x, int32_t y)
-  {
-    if constexpr (Veclen > 1) {
-      // Note that we enforce here that the unsigned version of dp4a is used, because the difference
-      // between two int8 numbers can be greater than 127 and therefore represented as a negative
-      // number in int8. Casting from int8 to int32 would yield incorrect results, while casting
-      // from uint8 to uint32 is correct.
-      const auto diff = __vabsdiffs4(x, y);
-      acc             = dp4a(diff, diff, static_cast<uint32_t>(acc));
-    } else {
-      const auto diff = x - y;
-      acc += diff * diff;
-    }
-  }
-};
-
-template <int Veclen, typename T, typename AccT>
-struct inner_prod_dist {
-  __device__ __forceinline__ void operator()(AccT& acc, AccT x, AccT y)
-  {
-    if constexpr (Veclen > 1 && (std::is_same_v<T, int8_t> || std::is_same_v<T, uint8_t>)) {
-      acc = dp4a(x, y, acc);
-    } else {
-      acc += x * y;
-    }
-  }
-};
-
-/** Select the distance computation function and forward the rest of the arguments. */
-template <int Capacity,
-          int Veclen,
-          bool Ascending,
-          typename T,
-          typename AccT,
-          typename IdxT,
-          typename... Args>
-void launch_with_fixed_consts(raft::distance::DistanceType metric, Args&&... args)
-{
-  switch (metric) {
-    case raft::distance::DistanceType::L2Expanded:
-    case raft::distance::DistanceType::L2Unexpanded:
-      return launch_kernel<Capacity,
-                           Veclen,
-                           Ascending,
-                           T,
-                           AccT,
-                           IdxT,
-                           euclidean_dist<Veclen, T, AccT>,
-                           raft::identity_op>({}, {}, std::forward<Args>(args)...);
-    case raft::distance::DistanceType::L2SqrtExpanded:
-    case raft::distance::DistanceType::L2SqrtUnexpanded:
-      return launch_kernel<Capacity,
-                           Veclen,
-                           Ascending,
-                           T,
-                           AccT,
-                           IdxT,
-                           euclidean_dist<Veclen, T, AccT>,
-                           raft::sqrt_op>({}, {}, std::forward<Args>(args)...);
-    case raft::distance::DistanceType::InnerProduct:
-      return launch_kernel<Capacity,
-                           Veclen,
-                           Ascending,
-                           T,
-                           AccT,
-                           IdxT,
-                           inner_prod_dist<Veclen, T, AccT>,
-                           raft::identity_op>({}, {}, std::forward<Args>(args)...);
-    // NB: update the description of `knn::ivf_flat::build` when adding here a new metric.
-    default: RAFT_FAIL("The chosen distance metric is not supported (%d)", int(metric));
-  }
-}
-
-/**
- * Lift the `capacity` and `veclen` parameters to the template level,
- * forward the rest of the arguments unmodified to `launch_interleaved_scan_kernel`.
- */
-template <typename T,
-          typename AccT,
-          typename IdxT,
-          int Capacity = matrix::detail::select::warpsort::kMaxCapacity,
-          int Veclen   = std::max<int>(1, 16 / sizeof(T))>
-struct select_interleaved_scan_kernel {
-  /**
-   * Recursively reduce the `Capacity` and `Veclen` parameters until they match the
-   * corresponding runtime arguments.
-   * By default, this recursive process starts with maximum possible values of the
-   * two parameters and ends with both values equal to 1.
-   */
-  template <typename... Args>
-  static inline void run(int capacity, int veclen, bool select_min, Args&&... args)
-  {
-    if constexpr (Capacity > 1) {
-      if (capacity * 2 <= Capacity) {
-        return select_interleaved_scan_kernel<T, AccT, IdxT, Capacity / 2, Veclen>::run(
-          capacity, veclen, select_min, std::forward<Args>(args)...);
-      }
-    }
-    if constexpr (Veclen > 1) {
-      if (veclen * 2 <= Veclen) {
-        return select_interleaved_scan_kernel<T, AccT, IdxT, Capacity, Veclen / 2>::run(
-          capacity, veclen, select_min, std::forward<Args>(args)...);
-      }
-    }
-    // NB: this is the limitation of the warpsort structures that use a huge number of
-    //     registers (used in the main kernel here).
-    RAFT_EXPECTS(capacity == Capacity,
-                 "Capacity must be power-of-two not bigger than the maximum allowed size "
-                 "matrix::detail::select::warpsort::kMaxCapacity (%d).",
-                 matrix::detail::select::warpsort::kMaxCapacity);
-    RAFT_EXPECTS(
-      veclen == Veclen,
-      "Veclen must be power-of-two not bigger than the maximum allowed size for this data type.");
-    if (select_min) {
-      launch_with_fixed_consts<Capacity, Veclen, true, T, AccT, IdxT>(std::forward<Args>(args)...);
-    } else {
-      launch_with_fixed_consts<Capacity, Veclen, false, T, AccT, IdxT>(std::forward<Args>(args)...);
-    }
-  }
-};
-
-/**
- * @brief Configure and launch an appropriate template instance of the interleaved scan kernel.
- *
- * @tparam T value type
- * @tparam AccT accumulated type
- * @tparam IdxT type of the indices
- *
- * @param index previously built ivf-flat index
- * @param[in] queries device pointer to the query vectors [batch_size, dim]
- * @param[in] coarse_query_results device pointer to the cluster (list) ids [batch_size, n_probes]
- * @param n_queries batch size
- * @param metric type of the measured distance
- * @param n_probes number of nearest clusters to query
- * @param k number of nearest neighbors.
- *            NB: the maximum value of `k` is limited statically by `kMaxCapacity`.
- * @param select_min whether to select nearest (true) or furthest (false) points w.r.t. the given
- * metric.
- * @param[out] neighbors device pointer to the result indices for each query and cluster
- * [batch_size, grid_dim_x, k]
- * @param[out] distances device pointer to the result distances for each query and cluster
- * [batch_size, grid_dim_x, k]
- * @param[inout] grid_dim_x number of blocks launched across all n_probes clusters;
- *               (one block processes one or more probes, hence: 1 <= grid_dim_x <= n_probes)
- * @param stream
- */
-template <typename T, typename AccT, typename IdxT>
-void ivfflat_interleaved_scan(const index<T, IdxT>& index,
-                              const T* queries,
-                              const uint32_t* coarse_query_results,
-                              const uint32_t n_queries,
-                              const raft::distance::DistanceType metric,
-                              const uint32_t n_probes,
-                              const uint32_t k,
-                              const bool select_min,
-                              IdxT* neighbors,
-                              float* distances,
-                              uint32_t& grid_dim_x,
-                              rmm::cuda_stream_view stream)
-{
-  // greppable-id-specializations-ivf-flat-search: The ivfflat_interleaved_scan
-  // function is used in both raft::neighbors::ivf_flat::search and
-  // raft::neighbors::detail::refine_device. To prevent a duplicate
-  // instantiation of this function (which defines ~270 kernels) in the refine
-  // specializations, an extern template definition is provided. Please check
-  // related function calls after editing this function definition. Search for
-  // `greppable-id-specializations-ivf-flat-search` to find them.
-
-  const int capacity = bound_by_power_of_two(k);
-  select_interleaved_scan_kernel<T, AccT, IdxT>::run(capacity,
-                                                     index.veclen(),
-                                                     select_min,
-                                                     metric,
-                                                     index,
-                                                     queries,
-                                                     coarse_query_results,
-                                                     n_queries,
-                                                     n_probes,
-                                                     k,
-                                                     neighbors,
-                                                     distances,
-                                                     grid_dim_x,
-                                                     stream);
-}
-
-template <typename T, typename AccT, typename IdxT>
-void search_impl(raft::device_resources const& handle,
-                 const index<T, IdxT>& index,
-                 const T* queries,
-                 uint32_t n_queries,
-                 uint32_t k,
-                 uint32_t n_probes,
-                 bool select_min,
-                 IdxT* neighbors,
-                 AccT* distances,
-                 rmm::mr::device_memory_resource* search_mr)
-{
-  auto stream = handle.get_stream();
-  // The norm of query
-  rmm::device_uvector<float> query_norm_dev(n_queries, stream, search_mr);
-  // The distance value of cluster(list) and queries
-  rmm::device_uvector<float> distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr);
-  // The topk distance value of cluster(list) and queries
-  rmm::device_uvector<float> coarse_distances_dev(n_queries * n_probes, stream, search_mr);
-  // The topk  index of cluster(list) and queries
-  rmm::device_uvector<uint32_t> coarse_indices_dev(n_queries * n_probes, stream, search_mr);
-  // The topk distance value of candidate vectors from each cluster(list)
-  rmm::device_uvector<AccT> refined_distances_dev(n_queries * n_probes * k, stream, search_mr);
-  // The topk index of candidate vectors from each cluster(list)
-  rmm::device_uvector<IdxT> refined_indices_dev(n_queries * n_probes * k, stream, search_mr);
-
-  size_t float_query_size;
-  if constexpr (std::is_integral_v<T>) {
-    float_query_size = n_queries * index.dim();
-  } else {
-    float_query_size = 0;
-  }
-  rmm::device_uvector<float> converted_queries_dev(float_query_size, stream, search_mr);
-  float* converted_queries_ptr = converted_queries_dev.data();
-
-  if constexpr (std::is_same_v<T, float>) {
-    converted_queries_ptr = const_cast<float*>(queries);
-  } else {
-    linalg::unaryOp(
-      converted_queries_ptr, queries, n_queries * index.dim(), utils::mapping<float>{}, stream);
-  }
-
-  float alpha = 1.0f;
-  float beta  = 0.0f;
-
-  // todo(lsugy): raft distance? (if performance is similar/better than gemm)
-  switch (index.metric()) {
-    case raft::distance::DistanceType::L2Expanded:
-    case raft::distance::DistanceType::L2SqrtExpanded: {
-      alpha = -2.0f;
-      beta  = 1.0f;
-      raft::linalg::rowNorm(query_norm_dev.data(),
-                            converted_queries_ptr,
-                            static_cast<IdxT>(index.dim()),
-                            static_cast<IdxT>(n_queries),
-                            raft::linalg::L2Norm,
-                            true,
-                            stream);
-      utils::outer_add(query_norm_dev.data(),
-                       (IdxT)n_queries,
-                       index.center_norms()->data_handle(),
-                       (IdxT)index.n_lists(),
-                       distance_buffer_dev.data(),
-                       stream);
-      RAFT_LOG_TRACE_VEC(index.center_norms()->data_handle(), std::min<uint32_t>(20, index.dim()));
-      RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min<uint32_t>(20, index.n_lists()));
-      break;
-    }
-    default: {
-      alpha = 1.0f;
-      beta  = 0.0f;
-    }
-  }
-
-  linalg::gemm(handle,
-               true,
-               false,
-               index.n_lists(),
-               n_queries,
-               index.dim(),
-               &alpha,
-               index.centers().data_handle(),
-               index.dim(),
-               converted_queries_ptr,
-               index.dim(),
-               &beta,
-               distance_buffer_dev.data(),
-               index.n_lists(),
-               stream);
-
-  RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min<uint32_t>(20, index.n_lists()));
-  matrix::detail::select_k<AccT, uint32_t>(distance_buffer_dev.data(),
-                                           nullptr,
-                                           n_queries,
-                                           index.n_lists(),
-                                           n_probes,
-                                           coarse_distances_dev.data(),
-                                           coarse_indices_dev.data(),
-                                           select_min,
-                                           stream,
-                                           search_mr);
-  RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), n_probes);
-  RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), n_probes);
-
-  auto distances_dev_ptr = refined_distances_dev.data();
-  auto indices_dev_ptr   = refined_indices_dev.data();
-
-  uint32_t grid_dim_x = 0;
-  if (n_probes > 1) {
-    // query the gridDimX size to store probes topK output
-    ivfflat_interleaved_scan<T, typename utils::config<T>::value_t, IdxT>(index,
-                                                                          nullptr,
-                                                                          nullptr,
-                                                                          n_queries,
-                                                                          index.metric(),
-                                                                          n_probes,
-                                                                          k,
-                                                                          select_min,
-                                                                          nullptr,
-                                                                          nullptr,
-                                                                          grid_dim_x,
-                                                                          stream);
-  } else {
-    grid_dim_x = 1;
-  }
-
-  if (grid_dim_x == 1) {
-    distances_dev_ptr = distances;
-    indices_dev_ptr   = neighbors;
-  }
-
-  ivfflat_interleaved_scan<T, typename utils::config<T>::value_t, IdxT>(index,
-                                                                        queries,
-                                                                        coarse_indices_dev.data(),
-                                                                        n_queries,
-                                                                        index.metric(),
-                                                                        n_probes,
-                                                                        k,
-                                                                        select_min,
-                                                                        indices_dev_ptr,
-                                                                        distances_dev_ptr,
-                                                                        grid_dim_x,
-                                                                        stream);
-
-  RAFT_LOG_TRACE_VEC(distances_dev_ptr, 2 * k);
-  RAFT_LOG_TRACE_VEC(indices_dev_ptr, 2 * k);
-
-  // Merge topk values from different blocks
-  if (grid_dim_x > 1) {
-    matrix::detail::select_k<AccT, IdxT>(refined_distances_dev.data(),
-                                         refined_indices_dev.data(),
-                                         n_queries,
-                                         k * grid_dim_x,
-                                         k,
-                                         distances,
-                                         neighbors,
-                                         select_min,
-                                         stream,
-                                         search_mr);
-  }
-}
-
-/** See raft::neighbors::ivf_flat::search docs */
-template <typename T, typename IdxT>
-inline void search(raft::device_resources const& handle,
-                   const search_params& params,
-                   const index<T, IdxT>& index,
-                   const T* queries,
-                   uint32_t n_queries,
-                   uint32_t k,
-                   IdxT* neighbors,
-                   float* distances,
-                   rmm::mr::device_memory_resource* mr = nullptr)
-{
-  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
-    "ivf_flat::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim());
-
-  RAFT_EXPECTS(params.n_probes > 0,
-               "n_probes (number of clusters to probe in the search) must be positive.");
-  auto n_probes = std::min<uint32_t>(params.n_probes, index.n_lists());
-
-  auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("ivf_flat::search: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
-  }
-
-  return search_impl<T, float, IdxT>(handle,
-                                     index,
-                                     queries,
-                                     n_queries,
-                                     k,
-                                     n_probes,
-                                     raft::distance::is_min_close(index.metric()),
-                                     neighbors,
-                                     distances,
-                                     mr);
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "ivf_flat_search-inl.cuh"
+#endif
 
-}  // namespace raft::neighbors::ivf_flat::detail
+#ifdef RAFT_COMPILED
+#include "ivf_flat_search-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
index 1bb7f97123..bec3b890eb 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
@@ -21,6 +21,7 @@
 #include <raft/neighbors/ivf_flat_types.hpp>
 #include <raft/neighbors/ivf_list.hpp>
 #include <raft/neighbors/ivf_list_types.hpp>
+#include <raft/util/pow2_utils.cuh>
 
 #include <fstream>
 
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
new file mode 100644
index 0000000000..41e9fda701
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuda_fp16.h>                               // __half
+#include <raft/core/detail/macros.hpp>               // RAFT_WEAK_FUNCTION
+#include <raft/distance/distance_types.hpp>          // raft::distance::DistanceType
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>  // raft::neighbors::ivf_pq::detail::fp_8bit
+#include <raft/neighbors/ivf_pq_types.hpp>           // raft::neighbors::ivf_pq::codebook_gen
+#include <raft/util/raft_explicit.hpp>               // RAFT_EXPLICIT
+#include <rmm/cuda_stream_view.hpp>                  // rmm::cuda_stream_view
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ivf_pq::detail {
+
+// is_local_topk_feasible is not inline here, because we would have to define it
+// here as well. That would run the risk of the definitions here and in the
+// -inl.cuh header diverging.
+auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k, uint32_t n_probes, uint32_t n_queries)
+  -> bool;
+
+template <typename OutT,
+          typename LutT,
+          uint32_t PqBits,
+          int Capacity,
+          bool PrecompBaseDiff,
+          bool EnableSMemLut>
+__global__ void compute_similarity_kernel(uint32_t n_rows,
+                                          uint32_t dim,
+                                          uint32_t n_probes,
+                                          uint32_t pq_dim,
+                                          uint32_t n_queries,
+                                          distance::DistanceType metric,
+                                          codebook_gen codebook_kind,
+                                          uint32_t topk,
+                                          uint32_t max_samples,
+                                          const float* cluster_centers,
+                                          const float* pq_centers,
+                                          const uint8_t* const* pq_dataset,
+                                          const uint32_t* cluster_labels,
+                                          const uint32_t* _chunk_indices,
+                                          const float* queries,
+                                          const uint32_t* index_list,
+                                          float* query_kths,
+                                          LutT* lut_scores,
+                                          OutT* _out_scores,
+                                          uint32_t* _out_indices) RAFT_EXPLICIT;
+
+// The signature of the kernel defined by a minimal set of template parameters
+template <typename OutT, typename LutT>
+using compute_similarity_kernel_t =
+  decltype(&compute_similarity_kernel<OutT, LutT, 8, 0, true, true>);
+
+template <typename OutT, typename LutT>
+struct selected {
+  compute_similarity_kernel_t<OutT, LutT> kernel;
+  dim3 grid_dim;
+  dim3 block_dim;
+  size_t smem_size;
+  size_t device_lut_size;
+};
+
+template <typename OutT, typename LutT>
+void compute_similarity_run(selected<OutT, LutT> s,
+                            rmm::cuda_stream_view stream,
+                            uint32_t n_rows,
+                            uint32_t dim,
+                            uint32_t n_probes,
+                            uint32_t pq_dim,
+                            uint32_t n_queries,
+                            distance::DistanceType metric,
+                            codebook_gen codebook_kind,
+                            uint32_t topk,
+                            uint32_t max_samples,
+                            const float* cluster_centers,
+                            const float* pq_centers,
+                            const uint8_t* const* pq_dataset,
+                            const uint32_t* cluster_labels,
+                            const uint32_t* _chunk_indices,
+                            const float* queries,
+                            const uint32_t* index_list,
+                            float* query_kths,
+                            LutT* lut_scores,
+                            OutT* _out_scores,
+                            uint32_t* _out_indices) RAFT_EXPLICIT;
+
+/**
+ * Use heuristics to choose an optimal instance of the search kernel.
+ * It selects among a few kernel variants (with/out using shared mem for
+ * lookup tables / precomputed distances) and tries to choose the block size
+ * to maximize kernel occupancy.
+ *
+ * @param manage_local_topk
+ *    whether use the fused calculate+select or just calculate the distances for each
+ *    query and probed cluster.
+ *
+ * @param locality_hint
+ *    beyond this limit do not consider increasing the number of active blocks per SM
+ *    would improve locality anymore.
+ */
+template <typename OutT, typename LutT>
+auto compute_similarity_select(const cudaDeviceProp& dev_props,
+                               bool manage_local_topk,
+                               int locality_hint,
+                               double preferred_shmem_carveout,
+                               uint32_t pq_bits,
+                               uint32_t pq_dim,
+                               uint32_t precomp_data_count,
+                               uint32_t n_queries,
+                               uint32_t n_probes,
+                               uint32_t topk) -> selected<OutT, LutT> RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ivf_pq::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)         \
+  extern template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                           \
+    bool manage_local_topk,                                                                    \
+    int locality_hint,                                                                         \
+    double preferred_shmem_carveout,                                                           \
+    uint32_t pq_bits,                                                                          \
+    uint32_t pq_dim,                                                                           \
+    uint32_t precomp_data_count,                                                               \
+    uint32_t n_queries,                                                                        \
+    uint32_t n_probes,                                                                         \
+    uint32_t topk)                                                                             \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                                   \
+                                                                                               \
+  extern template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                                   \
+    rmm::cuda_stream_view stream,                                                              \
+    uint32_t n_rows,                                                                           \
+    uint32_t dim,                                                                              \
+    uint32_t n_probes,                                                                         \
+    uint32_t pq_dim,                                                                           \
+    uint32_t n_queries,                                                                        \
+    raft::distance::DistanceType metric,                                                       \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                       \
+    uint32_t topk,                                                                             \
+    uint32_t max_samples,                                                                      \
+    const float* cluster_centers,                                                              \
+    const float* pq_centers,                                                                   \
+    const uint8_t* const* pq_dataset,                                                          \
+    const uint32_t* cluster_labels,                                                            \
+    const uint32_t* _chunk_indices,                                                            \
+    const float* queries,                                                                      \
+    const uint32_t* index_list,                                                                \
+    float* query_kths,                                                                         \
+    LutT* lut_scores,                                                                          \
+    OutT* _out_scores,                                                                         \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(half, half);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, half);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, float);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
new file mode 100644
index 0000000000..bc899c7ca7
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
@@ -0,0 +1,845 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
+#include <raft/matrix/detail/select_warpsort.cuh>  // matrix::detail::select::warpsort::warp_sort_distributed
+#include <raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh>  // dummy_block_sort_t
+#include <raft/neighbors/ivf_pq_types.hpp>                    // codebook_gen
+#include <raft/util/cuda_rt_essentials.hpp>                   // RAFT_CUDA_TRY
+#include <raft/util/device_atomics.cuh>                       // raft::atomicMin
+#include <raft/util/pow2_utils.cuh>                           // raft::Pow2
+#include <raft/util/vectorized.cuh>                           // raft::TxN_t
+#include <rmm/cuda_stream_view.hpp>                           // rmm::cuda_stream_view
+
+namespace raft::neighbors::ivf_pq::detail {
+
+/**
+ * Maximum value of k for the fused calculate & select in ivfpq.
+ *
+ * If runtime value of k is larger than this, the main search operation
+ * is split into two kernels (per batch, first calculate distance, then select top-k).
+ */
+static constexpr int kMaxCapacity = 128;
+static_assert((kMaxCapacity >= 32) && !(kMaxCapacity & (kMaxCapacity - 1)),
+              "kMaxCapacity must be a power of two, not smaller than the WarpSize.");
+
+// using weak attribute here, because it may be compiled multiple times.
+auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k, uint32_t n_probes, uint32_t n_queries)
+  -> bool
+{
+  if (k > kMaxCapacity) { return false; }             // warp_sort not possible
+  if (n_probes <= 16) { return false; }               // too few clusters
+  if (n_queries * n_probes <= 256) { return false; }  // overall amount of work is too small
+  return true;
+}
+
+template <int Capacity, typename T, typename IdxT>
+struct pq_block_sort {
+  using type = matrix::detail::select::warpsort::
+    block_sort<matrix::detail::select::warpsort::warp_sort_distributed, Capacity, true, T, IdxT>;
+};
+
+template <typename T, typename IdxT>
+struct pq_block_sort<0, T, IdxT> : dummy_block_sort_t<T, IdxT> {
+  using type = dummy_block_sort_t<T, IdxT>;
+};
+
+template <int Capacity, typename T, typename IdxT>
+using block_sort_t = typename pq_block_sort<Capacity, T, IdxT>::type;
+
+/**
+ * Estimate a carveout value as expected by `cudaFuncAttributePreferredSharedMemoryCarveout`
+ * (which does not take into account `reservedSharedMemPerBlock`),
+ * given by a desired schmem-L1 split and a per-block memory requirement in bytes.
+ *
+ * NB: As per the programming guide, the memory carveout setting is just a hint for the driver; it's
+ * free to choose any shmem-L1 configuration it deems appropriate. For example, if you set the
+ * carveout to zero, it will choose a non-zero config that will allow to run at least one active
+ * block per SM.
+ *
+ * @param shmem_fraction
+ *   a fraction representing a desired split (shmem / (shmem + L1)) [0, 1].
+ * @param shmem_per_block
+ *   a shared memory usage per block (dynamic + static shared memory sizes), in bytes.
+ * @param dev_props
+ *   device properties.
+ * @return
+ *   a carveout value in percents [0, 100].
+ */
+constexpr inline auto estimate_carveout(double shmem_fraction,
+                                        size_t shmem_per_block,
+                                        const cudaDeviceProp& dev_props) -> int
+{
+  using shmem_unit = Pow2<128>;
+  size_t m         = shmem_unit::roundUp(shmem_per_block);
+  size_t r         = dev_props.reservedSharedMemPerBlock;
+  size_t s         = dev_props.sharedMemPerMultiprocessor;
+  return (size_t(100 * s * m * shmem_fraction) - (m - 1) * r) / (s * (m + r));
+}
+
+/* Manually unrolled loop over a chunk of pq_dataset that fits into one VecT. */
+template <typename OutT,
+          typename LutT,
+          typename VecT,
+          bool CheckBounds,
+          uint32_t PqBits,
+          uint32_t BitsLeft = 0,
+          uint32_t Ix       = 0>
+__device__ __forceinline__ void ivfpq_compute_chunk(OutT& score /* NOLINT */,
+                                                    typename VecT::math_t& pq_code,
+                                                    const VecT& pq_codes,
+                                                    const LutT*& lut_head,
+                                                    const LutT*& lut_end)
+{
+  if constexpr (CheckBounds) {
+    if (lut_head >= lut_end) { return; }
+  }
+  constexpr uint32_t kTotalBits = 8 * sizeof(typename VecT::math_t);
+  constexpr uint32_t kPqShift   = 1u << PqBits;
+  constexpr uint32_t kPqMask    = kPqShift - 1u;
+  if constexpr (BitsLeft >= PqBits) {
+    uint8_t code = pq_code & kPqMask;
+    pq_code >>= PqBits;
+    score += OutT(lut_head[code]);
+    lut_head += kPqShift;
+    return ivfpq_compute_chunk<OutT, LutT, VecT, CheckBounds, PqBits, BitsLeft - PqBits, Ix>(
+      score, pq_code, pq_codes, lut_head, lut_end);
+  } else if constexpr (Ix < VecT::Ratio) {
+    uint8_t code                = pq_code;
+    pq_code                     = pq_codes.val.data[Ix];
+    constexpr uint32_t kRemBits = PqBits - BitsLeft;
+    constexpr uint32_t kRemMask = (1u << kRemBits) - 1u;
+    code |= (pq_code & kRemMask) << BitsLeft;
+    pq_code >>= kRemBits;
+    score += OutT(lut_head[code]);
+    lut_head += kPqShift;
+    return ivfpq_compute_chunk<OutT,
+                               LutT,
+                               VecT,
+                               CheckBounds,
+                               PqBits,
+                               kTotalBits - kRemBits,
+                               Ix + 1>(score, pq_code, pq_codes, lut_head, lut_end);
+  }
+}
+
+/* Compute the similarity for one vector in the pq_dataset */
+template <typename OutT, typename LutT, typename VecT, uint32_t PqBits>
+__device__ auto ivfpq_compute_score(uint32_t pq_dim,
+                                    const typename VecT::io_t* pq_head,
+                                    const LutT* lut_scores,
+                                    OutT early_stop_limit) -> OutT
+{
+  constexpr uint32_t kChunkSize = sizeof(VecT) * 8u / PqBits;
+  auto lut_head                 = lut_scores;
+  auto lut_end                  = lut_scores + (pq_dim << PqBits);
+  VecT pq_codes;
+  OutT score{0};
+  for (; pq_dim >= kChunkSize; pq_dim -= kChunkSize) {
+    *pq_codes.vectorized_data() = *pq_head;
+    pq_head += kIndexGroupSize;
+    typename VecT::math_t pq_code = 0;
+    ivfpq_compute_chunk<OutT, LutT, VecT, false, PqBits>(
+      score, pq_code, pq_codes, lut_head, lut_end);
+    // Early stop when it makes sense (otherwise early_stop_limit is kDummy/infinity).
+    if (score >= early_stop_limit) { return score; }
+  }
+  if (pq_dim > 0) {
+    *pq_codes.vectorized_data()   = *pq_head;
+    typename VecT::math_t pq_code = 0;
+    ivfpq_compute_chunk<OutT, LutT, VecT, true, PqBits>(
+      score, pq_code, pq_codes, lut_head, lut_end);
+  }
+  return score;
+}
+
+/**
+ * The main kernel that computes similarity scores across multiple queries and probes.
+ * When `Capacity > 0`, it also selects top K candidates for each query and probe
+ * (which need to be merged across probes afterwards).
+ *
+ * Each block processes a (query, probe) pair: it calculates the distance between the single query
+ * vector and all the dataset vector in the cluster that we are probing.
+ *
+ * @tparam OutT
+ *   The output type - distances.
+ * @tparam LutT
+ *   The lookup table element type (lut_scores).
+ * @tparam PqBits
+ *   The bit length of an encoded vector element after compression by PQ
+ *   (NB: pq_book_size = 1 << PqBits).
+ * @tparam Capacity
+ *   Power-of-two; the maximum possible `k` in top-k. Value zero disables fused top-k search.
+ * @tparam PrecompBaseDiff
+ *   Defines whether we should precompute part of the distance and keep it in shared memory
+ *   before the main part (score calculation) to increase memory usage efficiency in the latter.
+ *   For L2, this is the distance between the query and the cluster center.
+ * @tparam EnableSMemLut
+ *   Defines whether to use the shared memory for the lookup table (`lut_scores`).
+ *   Setting this to `false` allows to reduce the shared memory usage (and maximum data dim)
+ *   at the cost of reducing global memory reading throughput.
+ *
+ * @param n_rows the number of records in the dataset
+ * @param dim the dimensionality of the data (NB: after rotation transform, i.e. `index.rot_dim()`).
+ * @param n_probes the number of clusters to search for each query
+ * @param pq_dim
+ *   The dimensionality of an encoded vector after compression by PQ.
+ * @param n_queries the number of queries.
+ * @param metric the distance type.
+ * @param codebook_kind Defines the way PQ codebooks have been trained.
+ * @param topk the `k` in the select top-k.
+ * @param max_samples the size of the output for a single query.
+ * @param cluster_centers
+ *   The device pointer to the cluster centers in the original space (NB: after rotation)
+ *   [n_clusters, dim].
+ * @param pq_centers
+ *   The device pointer to the cluster centers in the PQ space
+ *   [pq_dim, pq_book_size, pq_len] or [n_clusters, pq_book_size, pq_len,].
+ * @param pq_dataset
+ *   The device pointer to the PQ index (data) [n_rows, ...].
+ * @param cluster_labels
+ *   The device pointer to the labels (clusters) for each query and probe [n_queries, n_probes].
+ * @param _chunk_indices
+ *   The device pointer to the data offsets for each query and probe [n_queries, n_probes].
+ * @param queries
+ *   The device pointer to the queries (NB: after rotation) [n_queries, dim].
+ * @param index_list
+ *   An optional device pointer to the enforced order of search [n_queries, n_probes].
+ *   One can pass reordered indices here to try to improve data reading locality.
+ * @param lut_scores
+ *   The device pointer for storing the lookup table globally [gridDim.x, pq_dim << PqBits].
+ *   Ignored when `EnableSMemLut == true`.
+ * @param _out_scores
+ *   The device pointer to the output scores
+ *   [n_queries, max_samples] or [n_queries, n_probes, topk].
+ * @param _out_indices
+ *   The device pointer to the output indices [n_queries, n_probes, topk].
+ *   These are the indices of the records as they appear in the database view formed by the probed
+ *   clusters / defined by the `_chunk_indices`.
+ *   The indices can have values within the range [0, max_samples).
+ *   Ignored  when `Capacity == 0`.
+ */
+template <typename OutT,
+          typename LutT,
+          uint32_t PqBits,
+          int Capacity,
+          bool PrecompBaseDiff,
+          bool EnableSMemLut>
+__global__ void compute_similarity_kernel(uint32_t n_rows,
+                                          uint32_t dim,
+                                          uint32_t n_probes,
+                                          uint32_t pq_dim,
+                                          uint32_t n_queries,
+                                          distance::DistanceType metric,
+                                          codebook_gen codebook_kind,
+                                          uint32_t topk,
+                                          uint32_t max_samples,
+                                          const float* cluster_centers,
+                                          const float* pq_centers,
+                                          const uint8_t* const* pq_dataset,
+                                          const uint32_t* cluster_labels,
+                                          const uint32_t* _chunk_indices,
+                                          const float* queries,
+                                          const uint32_t* index_list,
+                                          float* query_kths,
+                                          LutT* lut_scores,
+                                          OutT* _out_scores,
+                                          uint32_t* _out_indices)
+{
+  /* Shared memory:
+
+    * lut_scores: lookup table (LUT) of size = `pq_dim << PqBits`  (when EnableSMemLut)
+    * base_diff: size = dim (which is equal to `pq_dim * pq_len`)  or dim*2
+    * topk::block_sort: some amount of shared memory, but overlaps with the rest:
+        block_sort only needs shared memory for `.done()` operation, which can come very last.
+  */
+  extern __shared__ __align__(256) uint8_t smem_buf[];  // NOLINT
+  constexpr bool kManageLocalTopK = Capacity > 0;
+
+  constexpr uint32_t PqShift = 1u << PqBits;  // NOLINT
+  constexpr uint32_t PqMask  = PqShift - 1u;  // NOLINT
+
+  const uint32_t pq_len   = dim / pq_dim;
+  const uint32_t lut_size = pq_dim * PqShift;
+
+  if constexpr (EnableSMemLut) {
+    lut_scores = reinterpret_cast<LutT*>(smem_buf);
+  } else {
+    lut_scores += lut_size * blockIdx.x;
+  }
+
+  float* base_diff = nullptr;
+  if constexpr (PrecompBaseDiff) {
+    if constexpr (EnableSMemLut) {
+      base_diff = reinterpret_cast<float*>(lut_scores + lut_size);
+    } else {
+      base_diff = reinterpret_cast<float*>(smem_buf);
+    }
+  }
+
+  for (int ib = blockIdx.x; ib < n_queries * n_probes; ib += gridDim.x) {
+    if (ib >= gridDim.x) {
+      // sync shared memory accesses on the second and further iterations
+      __syncthreads();
+    }
+    uint32_t query_ix;
+    uint32_t probe_ix;
+    if (index_list == nullptr) {
+      query_ix = ib % n_queries;
+      probe_ix = ib / n_queries;
+    } else {
+      auto ordered_ix = index_list[ib];
+      query_ix        = ordered_ix / n_probes;
+      probe_ix        = ordered_ix % n_probes;
+    }
+
+    const uint32_t* chunk_indices = _chunk_indices + (n_probes * query_ix);
+    const float* query            = queries + (dim * query_ix);
+    OutT* out_scores;
+    uint32_t* out_indices = nullptr;
+    if constexpr (kManageLocalTopK) {
+      // Store topk calculated distances to out_scores (and its indices to out_indices)
+      out_scores  = _out_scores + topk * (probe_ix + (n_probes * query_ix));
+      out_indices = _out_indices + topk * (probe_ix + (n_probes * query_ix));
+    } else {
+      // Store all calculated distances to out_scores
+      out_scores = _out_scores + max_samples * query_ix;
+    }
+    uint32_t label              = cluster_labels[n_probes * query_ix + probe_ix];
+    const float* cluster_center = cluster_centers + (dim * label);
+    const float* pq_center;
+    if (codebook_kind == codebook_gen::PER_SUBSPACE) {
+      pq_center = pq_centers;
+    } else {
+      pq_center = pq_centers + (pq_len << PqBits) * label;
+    }
+
+    if constexpr (PrecompBaseDiff) {
+      // Reduce number of memory reads later by pre-computing parts of the score
+      switch (metric) {
+        case distance::DistanceType::L2SqrtExpanded:
+        case distance::DistanceType::L2Expanded: {
+          for (uint32_t i = threadIdx.x; i < dim; i += blockDim.x) {
+            base_diff[i] = query[i] - cluster_center[i];
+          }
+        } break;
+        case distance::DistanceType::InnerProduct: {
+          float2 pvals;
+          for (uint32_t i = threadIdx.x; i < dim; i += blockDim.x) {
+            pvals.x                                 = query[i];
+            pvals.y                                 = cluster_center[i] * pvals.x;
+            reinterpret_cast<float2*>(base_diff)[i] = pvals;
+          }
+        } break;
+        default: __builtin_unreachable();
+      }
+      __syncthreads();
+    }
+
+    {
+      // Create a lookup table
+      // For each subspace, the lookup table stores the distance between the actual query vector
+      // (projected into the subspace) and all possible pq vectors in that subspace.
+      for (uint32_t i = threadIdx.x; i < lut_size; i += blockDim.x) {
+        const uint32_t i_pq  = i >> PqBits;
+        uint32_t j           = i_pq * pq_len;
+        const uint32_t j_end = pq_len + j;
+        auto cur_pq_center   = pq_center + (i & PqMask) +
+                             (codebook_kind == codebook_gen::PER_SUBSPACE ? j * PqShift : 0u);
+        float score = 0.0;
+        do {
+          float pq_c = *cur_pq_center;
+          cur_pq_center += PqShift;
+          switch (metric) {
+            case distance::DistanceType::L2SqrtExpanded:
+            case distance::DistanceType::L2Expanded: {
+              float diff;
+              if constexpr (PrecompBaseDiff) {
+                diff = base_diff[j];
+              } else {
+                diff = query[j] - cluster_center[j];
+              }
+              diff -= pq_c;
+              score += diff * diff;
+            } break;
+            case distance::DistanceType::InnerProduct: {
+              // NB: we negate the scores as we hardcoded select-topk to always compute the minimum
+              float q;
+              if constexpr (PrecompBaseDiff) {
+                float2 pvals = reinterpret_cast<float2*>(base_diff)[j];
+                q            = pvals.x;
+                score -= pvals.y;
+              } else {
+                q = query[j];
+                score -= q * cluster_center[j];
+              }
+              score -= q * pq_c;
+            } break;
+            default: __builtin_unreachable();
+          }
+        } while (++j < j_end);
+        lut_scores[i] = LutT(score);
+      }
+    }
+
+    // Define helper types for efficient access to the pq_dataset, which is stored in an interleaved
+    // format. The chunks of PQ data are stored in kIndexGroupVecLen-bytes-long chunks, interleaved
+    // in groups of kIndexGroupSize elems (which is normally equal to the warp size) for the fastest
+    // possible access by thread warps.
+    //
+    // Consider one record in the pq_dataset is `pq_dim * pq_bits`-bit-long.
+    // Assuming `kIndexGroupVecLen = 16`, one chunk of data read by a thread at once is 128-bits.
+    // Then, such a chunk contains `chunk_size = 128 / pq_bits` record elements, and the record
+    // consists of `ceildiv(pq_dim, chunk_size)` chunks. The chunks are interleaved in groups of 32,
+    // so that the warp can achieve the best coalesced read throughput.
+    using group_align  = Pow2<kIndexGroupSize>;
+    using vec_align    = Pow2<kIndexGroupVecLen>;
+    using local_topk_t = block_sort_t<Capacity, OutT, uint32_t>;
+    using op_t         = uint32_t;
+    using vec_t        = TxN_t<op_t, kIndexGroupVecLen / sizeof(op_t)>;
+
+    uint32_t sample_offset = 0;
+    if (probe_ix > 0) { sample_offset = chunk_indices[probe_ix - 1]; }
+    uint32_t n_samples            = chunk_indices[probe_ix] - sample_offset;
+    uint32_t n_samples_aligned    = group_align::roundUp(n_samples);
+    constexpr uint32_t kChunkSize = (kIndexGroupVecLen * 8u) / PqBits;
+    uint32_t pq_line_width        = div_rounding_up_unsafe(pq_dim, kChunkSize) * kIndexGroupVecLen;
+    auto pq_thread_data = pq_dataset[label] + group_align::roundDown(threadIdx.x) * pq_line_width +
+                          group_align::mod(threadIdx.x) * vec_align::Value;
+    pq_line_width *= blockDim.x;
+
+    constexpr OutT kDummy = upper_bound<OutT>();
+    OutT query_kth        = kDummy;
+    if constexpr (kManageLocalTopK) { query_kth = OutT(query_kths[query_ix]); }
+    local_topk_t block_topk(topk, nullptr, query_kth);
+    OutT early_stop_limit = kDummy;
+    switch (metric) {
+      // If the metric is non-negative, we can use the query_kth approximation as an early stop
+      // threshold to skip some iterations when computing the score. Add such metrics here.
+      case distance::DistanceType::L2SqrtExpanded:
+      case distance::DistanceType::L2Expanded: {
+        early_stop_limit = query_kth;
+      } break;
+      default: break;
+    }
+
+    // Ensure lut_scores is written by all threads before using it in ivfpq-compute-score
+    __threadfence_block();
+    __syncthreads();
+
+    // Compute a distance for each sample
+    for (uint32_t i = threadIdx.x; i < n_samples_aligned;
+         i += blockDim.x, pq_thread_data += pq_line_width) {
+      OutT score = kDummy;
+      bool valid = i < n_samples;
+      if (valid) {
+        score = ivfpq_compute_score<OutT, LutT, vec_t, PqBits>(
+          pq_dim,
+          reinterpret_cast<const vec_t::io_t*>(pq_thread_data),
+          lut_scores,
+          early_stop_limit);
+      }
+      if constexpr (kManageLocalTopK) {
+        block_topk.add(score, sample_offset + i);
+      } else {
+        if (valid) { out_scores[sample_offset + i] = score; }
+      }
+    }
+    if constexpr (kManageLocalTopK) {
+      // sync threads before the topk merging operation, because we reuse smem_buf
+      __syncthreads();
+      block_topk.done(smem_buf);
+      block_topk.store(out_scores, out_indices);
+      if (threadIdx.x == 0) { atomicMin(query_kths + query_ix, float(out_scores[topk - 1])); }
+    } else {
+      // fill in the rest of the out_scores with dummy values
+      if (probe_ix + 1 == n_probes) {
+        for (uint32_t i = threadIdx.x + sample_offset + n_samples; i < max_samples;
+             i += blockDim.x) {
+          out_scores[i] = kDummy;
+        }
+      }
+    }
+  }
+}
+
+// The signature of the kernel defined by a minimal set of template parameters
+template <typename OutT, typename LutT>
+using compute_similarity_kernel_t =
+  decltype(&compute_similarity_kernel<OutT, LutT, 8, 0, true, true>);
+
+// The config struct lifts the runtime parameters to the template parameters
+template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
+struct compute_similarity_kernel_config {
+ public:
+  static auto get(uint32_t pq_bits, uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
+  {
+    return kernel_choose_bits(pq_bits, k_max);
+  }
+
+ private:
+  static auto kernel_choose_bits(uint32_t pq_bits, uint32_t k_max)
+    -> compute_similarity_kernel_t<OutT, LutT>
+  {
+    switch (pq_bits) {
+      case 4: return kernel_try_capacity<4, kMaxCapacity>(k_max);
+      case 5: return kernel_try_capacity<5, kMaxCapacity>(k_max);
+      case 6: return kernel_try_capacity<6, kMaxCapacity>(k_max);
+      case 7: return kernel_try_capacity<7, kMaxCapacity>(k_max);
+      case 8: return kernel_try_capacity<8, kMaxCapacity>(k_max);
+      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
+    }
+  }
+
+  template <uint32_t PqBits, int Capacity>
+  static auto kernel_try_capacity(uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
+  {
+    if constexpr (Capacity > 0) {
+      if (k_max == 0 || k_max > Capacity) { return kernel_try_capacity<PqBits, 0>(k_max); }
+    }
+    if constexpr (Capacity > 1) {
+      if (k_max * 2 <= Capacity) { return kernel_try_capacity<PqBits, (Capacity / 2)>(k_max); }
+    }
+    return compute_similarity_kernel<OutT, LutT, PqBits, Capacity, PrecompBaseDiff, EnableSMemLut>;
+  }
+};
+
+// A standalone accessor function was necessary to make sure template
+// instantiation work correctly. This accessor function is not used anymore and
+// may be removed.
+template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
+auto get_compute_similarity_kernel(uint32_t pq_bits, uint32_t k_max)
+  -> compute_similarity_kernel_t<OutT, LutT>
+{
+  return compute_similarity_kernel_config<OutT, LutT, PrecompBaseDiff, EnableSMemLut>::get(pq_bits,
+                                                                                           k_max);
+}
+
+/** Estimate the occupancy for the given kernel on the given device. */
+template <typename OutT, typename LutT>
+struct occupancy_t {
+  using shmem_unit = Pow2<128>;
+
+  int blocks_per_sm = 0;
+  double occupancy  = 0.0;
+  double shmem_use  = 1.0;
+
+  inline occupancy_t() = default;
+  inline occupancy_t(size_t smem,
+                     uint32_t n_threads,
+                     compute_similarity_kernel_t<OutT, LutT> kernel,
+                     const cudaDeviceProp& dev_props)
+  {
+    RAFT_CUDA_TRY(
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blocks_per_sm, kernel, n_threads, smem));
+    occupancy = double(blocks_per_sm * n_threads) / double(dev_props.maxThreadsPerMultiProcessor);
+    shmem_use = double(shmem_unit::roundUp(smem) * blocks_per_sm) /
+                double(dev_props.sharedMemPerMultiprocessor);
+  }
+};
+
+template <typename OutT, typename LutT>
+struct selected {
+  compute_similarity_kernel_t<OutT, LutT> kernel;
+  dim3 grid_dim;
+  dim3 block_dim;
+  size_t smem_size;
+  size_t device_lut_size;
+};
+
+template <typename OutT, typename LutT>
+void compute_similarity_run(selected<OutT, LutT> s,
+                            rmm::cuda_stream_view stream,
+                            uint32_t n_rows,
+                            uint32_t dim,
+                            uint32_t n_probes,
+                            uint32_t pq_dim,
+                            uint32_t n_queries,
+                            distance::DistanceType metric,
+                            codebook_gen codebook_kind,
+                            uint32_t topk,
+                            uint32_t max_samples,
+                            const float* cluster_centers,
+                            const float* pq_centers,
+                            const uint8_t* const* pq_dataset,
+                            const uint32_t* cluster_labels,
+                            const uint32_t* _chunk_indices,
+                            const float* queries,
+                            const uint32_t* index_list,
+                            float* query_kths,
+                            LutT* lut_scores,
+                            OutT* _out_scores,
+                            uint32_t* _out_indices)
+{
+  s.kernel<<<s.grid_dim, s.block_dim, s.smem_size, stream>>>(n_rows,
+                                                             dim,
+                                                             n_probes,
+                                                             pq_dim,
+                                                             n_queries,
+                                                             metric,
+                                                             codebook_kind,
+                                                             topk,
+                                                             max_samples,
+                                                             cluster_centers,
+                                                             pq_centers,
+                                                             pq_dataset,
+                                                             cluster_labels,
+                                                             _chunk_indices,
+                                                             queries,
+                                                             index_list,
+                                                             query_kths,
+                                                             lut_scores,
+                                                             _out_scores,
+                                                             _out_indices);
+  RAFT_CHECK_CUDA(stream);
+}
+
+/**
+ * Use heuristics to choose an optimal instance of the search kernel.
+ * It selects among a few kernel variants (with/out using shared mem for
+ * lookup tables / precomputed distances) and tries to choose the block size
+ * to maximize kernel occupancy.
+ *
+ * @param manage_local_topk
+ *    whether use the fused calculate+select or just calculate the distances for each
+ *    query and probed cluster.
+ *
+ * @param locality_hint
+ *    beyond this limit do not consider increasing the number of active blocks per SM
+ *    would improve locality anymore.
+ */
+template <typename OutT, typename LutT>
+auto compute_similarity_select(const cudaDeviceProp& dev_props,
+                               bool manage_local_topk,
+                               int locality_hint,
+                               double preferred_shmem_carveout,
+                               uint32_t pq_bits,
+                               uint32_t pq_dim,
+                               uint32_t precomp_data_count,
+                               uint32_t n_queries,
+                               uint32_t n_probes,
+                               uint32_t topk) -> selected<OutT, LutT>
+{
+  // Shared memory for storing the lookup table
+  size_t lut_mem = sizeof(LutT) * (pq_dim << pq_bits);
+  // Shared memory for storing pre-computed pieces to speedup the lookup table construction
+  // (e.g. the distance between a cluster center and the query for L2).
+  size_t bdf_mem = sizeof(float) * precomp_data_count;
+  // Shared memory for the fused top-k component; it may overlap with the other uses of shared
+  // memory and depends on the number of threads.
+  struct ltk_mem_t {
+    uint32_t subwarp_size;
+    uint32_t topk;
+    bool manage_local_topk;
+    ltk_mem_t(bool manage_local_topk, uint32_t topk)
+      : manage_local_topk(manage_local_topk), topk(topk)
+    {
+      subwarp_size = WarpSize;
+      while (topk * 2 <= subwarp_size) {
+        subwarp_size /= 2;
+      }
+    }
+
+    [[nodiscard]] auto operator()(uint32_t n_threads) const -> size_t
+    {
+      return manage_local_topk
+               ? matrix::detail::select::warpsort::template calc_smem_size_for_block_wide<OutT,
+                                                                                          uint32_t>(
+                   n_threads / subwarp_size, topk)
+               : 0;
+    }
+  } ltk_mem{manage_local_topk, topk};
+
+  // Total amount of work; should be enough to occupy the GPU.
+  uint32_t n_blocks = n_queries * n_probes;
+
+  // The minimum block size we may want:
+  //   1. It's a power-of-two for efficient L1 caching of pq_centers values
+  //      (multiples of `1 << pq_bits`).
+  //   2. It should be large enough to fully utilize an SM.
+  uint32_t n_threads_min = WarpSize;
+  while (dev_props.maxBlocksPerMultiProcessor * int(n_threads_min) <
+         dev_props.maxThreadsPerMultiProcessor) {
+    n_threads_min *= 2;
+  }
+  // Further increase the minimum block size to make sure full device occupancy
+  // (NB: this may lead to `n_threads_min` being larger than the kernel's maximum)
+  while (int(n_blocks * n_threads_min) <
+           dev_props.multiProcessorCount * dev_props.maxThreadsPerMultiProcessor &&
+         int(n_threads_min) < dev_props.maxThreadsPerBlock) {
+    n_threads_min *= 2;
+  }
+  // Even further, increase it to allow less blocks per SM if there not enough queries.
+  // With this, we reduce the chance of different clusters being processed by two blocks
+  // on the same SM and thus improve the data locality for L1 caching.
+  while (int(n_queries * n_threads_min) < dev_props.maxThreadsPerMultiProcessor &&
+         int(n_threads_min) < dev_props.maxThreadsPerBlock) {
+    n_threads_min *= 2;
+  }
+
+  // Granularity of changing the number of threads when computing the maximum block size.
+  // It's good to have it multiple of the PQ book width.
+  uint32_t n_threads_gty = round_up_safe<uint32_t>(1u << pq_bits, WarpSize);
+
+  /*
+   Shared memory / L1 cache balance is the main limiter of this kernel.
+   The more blocks per SM we launch, the more shared memory we need. Besides that, we have
+   three versions of the kernel varying in performance and shmem usage.
+
+   We try the most demanding and the fastest kernel first, trying to maximize occupancy with
+   the minimum number of blocks (just one, really). Then, we tweak the `n_threads` to further
+   optimize occupancy and data locality for the L1 cache.
+   */
+  auto conf_fast        = get_compute_similarity_kernel<OutT, LutT, true, true>;
+  auto conf_no_basediff = get_compute_similarity_kernel<OutT, LutT, false, true>;
+  auto conf_no_smem_lut = get_compute_similarity_kernel<OutT, LutT, true, false>;
+  auto topk_or_zero     = manage_local_topk ? topk : 0u;
+  std::array candidates{std::make_tuple(conf_fast(pq_bits, topk_or_zero), lut_mem + bdf_mem, true),
+                        std::make_tuple(conf_no_basediff(pq_bits, topk_or_zero), lut_mem, true),
+                        std::make_tuple(conf_no_smem_lut(pq_bits, topk_or_zero), bdf_mem, false)};
+
+  // we may allow slightly lower than 100% occupancy;
+  constexpr double kTargetOccupancy = 0.75;
+  // This struct is used to select the better candidate
+  occupancy_t<OutT, LutT> selected_perf{};
+  selected<OutT, LutT> selected_config;
+  for (auto [kernel, smem_size_const, lut_is_in_shmem] : candidates) {
+    if (smem_size_const > dev_props.sharedMemPerBlockOptin) {
+      // Even a single block cannot fit into an SM due to shmem requirements. Skip the candidate.
+      continue;
+    }
+
+    // First, we set the carveout hint to the preferred value. The driver will increase this if
+    // needed to run at least one block per SM. At the same time, if more blocks fit into one SM,
+    // this carveout value will limit the calculated occupancy. When we're done selecting the best
+    // launch configuration, we will tighten the carveout once more, based on the final memory
+    // usage and occupancy.
+    const int max_carveout =
+      estimate_carveout(preferred_shmem_carveout, smem_size_const, dev_props);
+    RAFT_CUDA_TRY(
+      cudaFuncSetAttribute(kernel, cudaFuncAttributePreferredSharedMemoryCarveout, max_carveout));
+
+    // Get the theoretical maximum possible number of threads per block
+    cudaFuncAttributes kernel_attrs;
+    RAFT_CUDA_TRY(cudaFuncGetAttributes(&kernel_attrs, kernel));
+    uint32_t n_threads = round_down_safe<uint32_t>(kernel_attrs.maxThreadsPerBlock, n_threads_gty);
+
+    // Actual required shmem depens on the number of threads
+    size_t smem_size = max(smem_size_const, ltk_mem(n_threads));
+
+    // Make sure the kernel can get enough shmem.
+    cudaError_t cuda_status =
+      cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
+    if (cuda_status != cudaSuccess) {
+      RAFT_EXPECTS(
+        cuda_status == cudaGetLastError(),
+        "Tried to reset the expected cuda error code, but it didn't match the expectation");
+      // Failed to request enough shmem for the kernel. Skip the candidate.
+      continue;
+    }
+
+    occupancy_t<OutT, LutT> cur(smem_size, n_threads, kernel, dev_props);
+    if (cur.blocks_per_sm <= 0) {
+      // For some reason, we still cannot make this kernel run. Skip the candidate.
+      continue;
+    }
+
+    {
+      // Try to reduce the number of threads to increase occupancy and data locality
+      auto n_threads_tmp = n_threads_min;
+      while (n_threads_tmp * 2 < n_threads) {
+        n_threads_tmp *= 2;
+      }
+      if (n_threads_tmp < n_threads) {
+        while (n_threads_tmp >= n_threads_min) {
+          auto smem_size_tmp = max(smem_size_const, ltk_mem(n_threads_tmp));
+          occupancy_t<OutT, LutT> tmp(smem_size_tmp, n_threads_tmp, kernel, dev_props);
+          bool select_it = false;
+          if (lut_is_in_shmem && locality_hint >= tmp.blocks_per_sm) {
+            // Normally, the smaller the block the better for L1 cache hit rate.
+            // Hence, the occupancy should be "just good enough"
+            select_it = tmp.occupancy >= min(kTargetOccupancy, cur.occupancy);
+          } else if (lut_is_in_shmem) {
+            // If we don't have enough repeating probes (locality_hint < tmp.blocks_per_sm),
+            // the locality is not going to improve with increasing the number of blocks per SM.
+            // Hence, the only metric here is the occupancy.
+            bool improves_occupancy = tmp.occupancy > cur.occupancy;
+            // Otherwise, the performance still improves with a smaller block size,
+            // given there is enough work to do
+            bool improves_parallelism =
+              tmp.occupancy == cur.occupancy &&
+              7u * tmp.blocks_per_sm * dev_props.multiProcessorCount <= n_blocks;
+            select_it = improves_occupancy || improves_parallelism;
+          } else {
+            // If we don't use shared memory for the lookup table, increasing the number of blocks
+            // is very taxing on the global memory usage.
+            // In this case, the occupancy must increase a lot to make it worth the cost.
+            select_it = tmp.occupancy >= min(1.0, cur.occupancy / kTargetOccupancy);
+          }
+          if (select_it) {
+            n_threads = n_threads_tmp;
+            smem_size = smem_size_tmp;
+            cur       = tmp;
+          }
+          n_threads_tmp /= 2;
+        }
+      }
+    }
+
+    {
+      if (selected_perf.occupancy <= 0.0                 // no candidate yet
+          || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy &&
+              selected_perf.shmem_use >= cur.shmem_use)  // much improved occupancy
+      ) {
+        selected_perf = cur;
+        if (lut_is_in_shmem) {
+          selected_config = {
+            kernel, dim3(n_blocks, 1, 1), dim3(n_threads, 1, 1), smem_size, size_t(0)};
+        } else {
+          // When the global memory is used for the lookup table, we need to minimize the grid
+          // size; otherwise, the kernel may quickly run out of memory.
+          auto n_blocks_min =
+            std::min<uint32_t>(n_blocks, cur.blocks_per_sm * dev_props.multiProcessorCount);
+          selected_config = {kernel,
+                             dim3(n_blocks_min, 1, 1),
+                             dim3(n_threads, 1, 1),
+                             smem_size,
+                             size_t(n_blocks_min) * size_t(pq_dim << pq_bits)};
+        }
+        // Actual shmem/L1 split wildly rounds up the specified preferred carveout, so we set here
+        // a rather conservative bar; most likely, the kernel gets more shared memory than this,
+        // and the occupancy doesn't get hurt.
+        auto carveout = std::min<int>(max_carveout, std::ceil(100.0 * cur.shmem_use));
+        RAFT_CUDA_TRY(
+          cudaFuncSetAttribute(kernel, cudaFuncAttributePreferredSharedMemoryCarveout, carveout));
+        if (cur.occupancy >= kTargetOccupancy) { break; }
+      } else if (selected_perf.occupancy > 0.0) {
+        // If we found a reasonable candidate on a previous iteration, and this one is not better,
+        // then don't try any more candidates because they are much slower anyway.
+        break;
+      }
+    }
+  }
+
+  RAFT_EXPECTS(selected_perf.occupancy > 0.0,
+               "Couldn't determine a working kernel launch configuration.");
+
+  return selected_config;
+}
+
+}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh
similarity index 76%
rename from cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
rename to cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh
index 28306d0c21..d987c0d4ed 100644
--- a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh
@@ -14,7 +14,12 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
+#pragma once
 
-template class raft::distance::kernels::detail::PolynomialKernel<double, int>;
\ No newline at end of file
+#if !defined(RAFT_EXPLICIT_INSTANTIATE_ONLY)
+#include "ivf_pq_compute_similarity-inl.cuh"
+#endif
+
+#ifdef RAFT_COMPILED
+#include "ivf_pq_compute_similarity-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh
new file mode 100644
index 0000000000..a00b6a50ff
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/matrix/detail/select_warpsort.cuh>  // matrix::detail::select::warpsort::warp_sort_distributed
+
+/*
+ * This header file is a bit of an ugly duckling. The type dummy_block_sort is
+ * needed by both ivf_pq_search.cuh and ivf_pq_compute_similarity.cuh.
+ *
+ * I have decided to move it to it's own header file, which is overkill. Perhaps
+ * there is a nicer solution.
+ *
+ */
+
+namespace raft::neighbors::ivf_pq::detail {
+
+template <typename T, typename IdxT>
+struct dummy_block_sort_t {
+  using queue_t = matrix::detail::select::warpsort::warp_sort_distributed<WarpSize, true, T, IdxT>;
+  template <typename... Args>
+  __device__ dummy_block_sort_t(int k, Args...){};
+};
+
+}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
new file mode 100644
index 0000000000..87f9bfb622
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
+#include <raft/neighbors/ivf_pq_types.hpp>
+
+#include <raft/core/cudart_utils.hpp>
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/logger.hpp>
+#include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/distance/distance_types.hpp>
+#include <raft/linalg/gemm.cuh>
+#include <raft/linalg/map.cuh>
+#include <raft/linalg/unary_op.cuh>
+#include <raft/matrix/detail/select_k.cuh>
+#include <raft/matrix/detail/select_warpsort.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/device_atomics.cuh>
+#include <raft/util/device_loads_stores.cuh>
+#include <raft/util/pow2_utils.cuh>
+#include <raft/util/vectorized.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <cub/cub.cuh>
+
+#include <cuda_fp16.h>
+
+#include <optional>
+
+namespace raft::neighbors::ivf_pq::detail {
+
+/** 8-bit floating-point storage type.
+ *
+ * This is a custom type for the current IVF-PQ implementation. No arithmetic operations defined
+ * only conversion to and from fp32. This type is unrelated to the proposed FP8 specification.
+ */
+template <uint32_t ExpBits, bool Signed>
+struct fp_8bit {
+  static_assert(ExpBits + uint8_t{Signed} <= 8, "The type does not fit in 8 bits.");
+  constexpr static uint32_t ExpMask = (1u << (ExpBits - 1u)) - 1u;  // NOLINT
+  constexpr static uint32_t ValBits = 8u - ExpBits;                 // NOLINT
+
+ public:
+  uint8_t bitstring;
+
+  HDI explicit fp_8bit(uint8_t bs) : bitstring(bs) {}
+  HDI explicit fp_8bit(float fp) : fp_8bit(float2fp_8bit(fp).bitstring) {}
+  HDI auto operator=(float fp) -> fp_8bit<ExpBits, Signed>&
+  {
+    bitstring = float2fp_8bit(fp).bitstring;
+    return *this;
+  }
+  HDI explicit operator float() const { return fp_8bit2float(*this); }
+  HDI explicit operator half() const { return half(fp_8bit2float(*this)); }
+
+ private:
+  static constexpr float kMin = 1.0f / float(1u << ExpMask);
+  static constexpr float kMax = float(1u << (ExpMask + 1)) * (2.0f - 1.0f / float(1u << ValBits));
+
+  static HDI auto float2fp_8bit(float v) -> fp_8bit<ExpBits, Signed>
+  {
+    if constexpr (Signed) {
+      auto u = fp_8bit<ExpBits, false>(std::abs(v)).bitstring;
+      u      = (u & 0xfeu) | uint8_t{v < 0};  // set the sign bit
+      return fp_8bit<ExpBits, true>(u);
+    } else {
+      // sic! all small and negative numbers are truncated to zero.
+      if (v < kMin) { return fp_8bit<ExpBits, false>{static_cast<uint8_t>(0)}; }
+      // protect from overflow
+      if (v >= kMax) { return fp_8bit<ExpBits, false>{static_cast<uint8_t>(0xffu)}; }
+      // the rest of possible float values should be within the normalized range
+      return fp_8bit<ExpBits, false>{static_cast<uint8_t>(
+        (*reinterpret_cast<uint32_t*>(&v) + (ExpMask << 23u) - 0x3f800000u) >> (15u + ExpBits))};
+    }
+  }
+
+  static HDI auto fp_8bit2float(const fp_8bit<ExpBits, Signed>& v) -> float
+  {
+    uint32_t u = v.bitstring;
+    if constexpr (Signed) {
+      u &= ~1;  // zero the sign bit
+    }
+    float r;
+    *reinterpret_cast<uint32_t*>(&r) =
+      ((u << (15u + ExpBits)) + (0x3f800000u | (0x00400000u >> ValBits)) - (ExpMask << 23));
+    if constexpr (Signed) {  // recover the sign bit
+      if (v.bitstring & 1) { r = -r; }
+    }
+    return r;
+  }
+};
+
+}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 8ddbe7fac0..0aa2862cf4 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -18,6 +18,9 @@
 
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
+#include <raft/neighbors/detail/ivf_pq_compute_similarity.cuh>
+#include <raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
 #include <raft/core/cudart_utils.hpp>
@@ -49,79 +52,8 @@
 
 namespace raft::neighbors::ivf_pq::detail {
 
-/**
- * Maximum value of k for the fused calculate & select in ivfpq.
- *
- * If runtime value of k is larger than this, the main search operation
- * is split into two kernels (per batch, first calculate distance, then select top-k).
- */
-static constexpr int kMaxCapacity = 128;
-static_assert((kMaxCapacity >= 32) && !(kMaxCapacity & (kMaxCapacity - 1)),
-              "kMaxCapacity must be a power of two, not smaller than the WarpSize.");
-
 using namespace raft::spatial::knn::detail;  // NOLINT
 
-/** 8-bit floating-point storage type.
- *
- * This is a custom type for the current IVF-PQ implementation. No arithmetic operations defined
- * only conversion to and from fp32. This type is unrelated to the proposed FP8 specification.
- */
-template <uint32_t ExpBits, bool Signed>
-struct fp_8bit {
-  static_assert(ExpBits + uint8_t{Signed} <= 8, "The type does not fit in 8 bits.");
-  constexpr static uint32_t ExpMask = (1u << (ExpBits - 1u)) - 1u;  // NOLINT
-  constexpr static uint32_t ValBits = 8u - ExpBits;                 // NOLINT
-
- public:
-  uint8_t bitstring;
-
-  HDI explicit fp_8bit(uint8_t bs) : bitstring(bs) {}
-  HDI explicit fp_8bit(float fp) : fp_8bit(float2fp_8bit(fp).bitstring) {}
-  HDI auto operator=(float fp) -> fp_8bit<ExpBits, Signed>&
-  {
-    bitstring = float2fp_8bit(fp).bitstring;
-    return *this;
-  }
-  HDI explicit operator float() const { return fp_8bit2float(*this); }
-  HDI explicit operator half() const { return half(fp_8bit2float(*this)); }
-
- private:
-  static constexpr float kMin = 1.0f / float(1u << ExpMask);
-  static constexpr float kMax = float(1u << (ExpMask + 1)) * (2.0f - 1.0f / float(1u << ValBits));
-
-  static HDI auto float2fp_8bit(float v) -> fp_8bit<ExpBits, Signed>
-  {
-    if constexpr (Signed) {
-      auto u = fp_8bit<ExpBits, false>(std::abs(v)).bitstring;
-      u      = (u & 0xfeu) | uint8_t{v < 0};  // set the sign bit
-      return fp_8bit<ExpBits, true>(u);
-    } else {
-      // sic! all small and negative numbers are truncated to zero.
-      if (v < kMin) { return fp_8bit<ExpBits, false>{static_cast<uint8_t>(0)}; }
-      // protect from overflow
-      if (v >= kMax) { return fp_8bit<ExpBits, false>{static_cast<uint8_t>(0xffu)}; }
-      // the rest of possible float values should be within the normalized range
-      return fp_8bit<ExpBits, false>{static_cast<uint8_t>(
-        (*reinterpret_cast<uint32_t*>(&v) + (ExpMask << 23u) - 0x3f800000u) >> (15u + ExpBits))};
-    }
-  }
-
-  static HDI auto fp_8bit2float(const fp_8bit<ExpBits, Signed>& v) -> float
-  {
-    uint32_t u = v.bitstring;
-    if constexpr (Signed) {
-      u &= ~1;  // zero the sign bit
-    }
-    float r;
-    *reinterpret_cast<uint32_t*>(&r) =
-      ((u << (15u + ExpBits)) + (0x3f800000u | (0x00400000u >> ValBits)) - (ExpMask << 23));
-    if constexpr (Signed) {  // recover the sign bit
-      if (v.bitstring & 1) { r = -r; }
-    }
-    return r;
-  }
-};
-
 /**
  * Select the clusters to probe and, as a side-effect, translate the queries type `T -> float`
  *
@@ -439,464 +371,6 @@ void postprocess_distances(float* out,        // [n_queries, topk]
   }
 }
 
-template <typename T, typename IdxT>
-struct dummy_block_sort_t {
-  using queue_t = matrix::detail::select::warpsort::warp_sort_distributed<WarpSize, true, T, IdxT>;
-  template <typename... Args>
-  __device__ dummy_block_sort_t(int k, Args...){};
-};
-
-template <int Capacity, typename T, typename IdxT>
-struct pq_block_sort {
-  using type = matrix::detail::select::warpsort::
-    block_sort<matrix::detail::select::warpsort::warp_sort_distributed, Capacity, true, T, IdxT>;
-};
-
-template <typename T, typename IdxT>
-struct pq_block_sort<0, T, IdxT> : dummy_block_sort_t<T, IdxT> {
-  using type = dummy_block_sort_t<T, IdxT>;
-};
-
-template <int Capacity, typename T, typename IdxT>
-using block_sort_t = typename pq_block_sort<Capacity, T, IdxT>::type;
-
-/* Manually unrolled loop over a chunk of pq_dataset that fits into one VecT. */
-template <typename OutT,
-          typename LutT,
-          typename VecT,
-          bool CheckBounds,
-          uint32_t PqBits,
-          uint32_t BitsLeft = 0,
-          uint32_t Ix       = 0>
-__device__ __forceinline__ void ivfpq_compute_chunk(OutT& score /* NOLINT */,
-                                                    typename VecT::math_t& pq_code,
-                                                    const VecT& pq_codes,
-                                                    const LutT*& lut_head,
-                                                    const LutT*& lut_end)
-{
-  if constexpr (CheckBounds) {
-    if (lut_head >= lut_end) { return; }
-  }
-  constexpr uint32_t kTotalBits = 8 * sizeof(typename VecT::math_t);
-  constexpr uint32_t kPqShift   = 1u << PqBits;
-  constexpr uint32_t kPqMask    = kPqShift - 1u;
-  if constexpr (BitsLeft >= PqBits) {
-    uint8_t code = pq_code & kPqMask;
-    pq_code >>= PqBits;
-    score += OutT(lut_head[code]);
-    lut_head += kPqShift;
-    return ivfpq_compute_chunk<OutT, LutT, VecT, CheckBounds, PqBits, BitsLeft - PqBits, Ix>(
-      score, pq_code, pq_codes, lut_head, lut_end);
-  } else if constexpr (Ix < VecT::Ratio) {
-    uint8_t code                = pq_code;
-    pq_code                     = pq_codes.val.data[Ix];
-    constexpr uint32_t kRemBits = PqBits - BitsLeft;
-    constexpr uint32_t kRemMask = (1u << kRemBits) - 1u;
-    code |= (pq_code & kRemMask) << BitsLeft;
-    pq_code >>= kRemBits;
-    score += OutT(lut_head[code]);
-    lut_head += kPqShift;
-    return ivfpq_compute_chunk<OutT,
-                               LutT,
-                               VecT,
-                               CheckBounds,
-                               PqBits,
-                               kTotalBits - kRemBits,
-                               Ix + 1>(score, pq_code, pq_codes, lut_head, lut_end);
-  }
-}
-
-/* Compute the similarity for one vector in the pq_dataset */
-template <typename OutT, typename LutT, typename VecT, uint32_t PqBits>
-__device__ auto ivfpq_compute_score(uint32_t pq_dim,
-                                    const typename VecT::io_t* pq_head,
-                                    const LutT* lut_scores,
-                                    OutT early_stop_limit) -> OutT
-{
-  constexpr uint32_t kChunkSize = sizeof(VecT) * 8u / PqBits;
-  auto lut_head                 = lut_scores;
-  auto lut_end                  = lut_scores + (pq_dim << PqBits);
-  VecT pq_codes;
-  OutT score{0};
-  for (; pq_dim >= kChunkSize; pq_dim -= kChunkSize) {
-    *pq_codes.vectorized_data() = *pq_head;
-    pq_head += kIndexGroupSize;
-    typename VecT::math_t pq_code = 0;
-    ivfpq_compute_chunk<OutT, LutT, VecT, false, PqBits>(
-      score, pq_code, pq_codes, lut_head, lut_end);
-    // Early stop when it makes sense (otherwise early_stop_limit is kDummy/infinity).
-    if (score >= early_stop_limit) { return score; }
-  }
-  if (pq_dim > 0) {
-    *pq_codes.vectorized_data()   = *pq_head;
-    typename VecT::math_t pq_code = 0;
-    ivfpq_compute_chunk<OutT, LutT, VecT, true, PqBits>(
-      score, pq_code, pq_codes, lut_head, lut_end);
-  }
-  return score;
-}
-
-/**
- * The main kernel that computes similarity scores across multiple queries and probes.
- * When `Capacity > 0`, it also selects top K candidates for each query and probe
- * (which need to be merged across probes afterwards).
- *
- * Each block processes a (query, probe) pair: it calculates the distance between the single query
- * vector and all the dataset vector in the cluster that we are probing.
- *
- * @tparam OutT
- *   The output type - distances.
- * @tparam LutT
- *   The lookup table element type (lut_scores).
- * @tparam PqBits
- *   The bit length of an encoded vector element after compression by PQ
- *   (NB: pq_book_size = 1 << PqBits).
- * @tparam Capacity
- *   Power-of-two; the maximum possible `k` in top-k. Value zero disables fused top-k search.
- * @tparam PrecompBaseDiff
- *   Defines whether we should precompute part of the distance and keep it in shared memory
- *   before the main part (score calculation) to increase memory usage efficiency in the latter.
- *   For L2, this is the distance between the query and the cluster center.
- * @tparam EnableSMemLut
- *   Defines whether to use the shared memory for the lookup table (`lut_scores`).
- *   Setting this to `false` allows to reduce the shared memory usage (and maximum data dim)
- *   at the cost of reducing global memory reading throughput.
- *
- * @param n_rows the number of records in the dataset
- * @param dim the dimensionality of the data (NB: after rotation transform, i.e. `index.rot_dim()`).
- * @param n_probes the number of clusters to search for each query
- * @param pq_dim
- *   The dimensionality of an encoded vector after compression by PQ.
- * @param n_queries the number of queries.
- * @param metric the distance type.
- * @param codebook_kind Defines the way PQ codebooks have been trained.
- * @param topk the `k` in the select top-k.
- * @param max_samples the size of the output for a single query.
- * @param cluster_centers
- *   The device pointer to the cluster centers in the original space (NB: after rotation)
- *   [n_clusters, dim].
- * @param pq_centers
- *   The device pointer to the cluster centers in the PQ space
- *   [pq_dim, pq_book_size, pq_len] or [n_clusters, pq_book_size, pq_len,].
- * @param pq_dataset
- *   The device pointer to the PQ index (data) [n_rows, ...].
- * @param cluster_labels
- *   The device pointer to the labels (clusters) for each query and probe [n_queries, n_probes].
- * @param _chunk_indices
- *   The device pointer to the data offsets for each query and probe [n_queries, n_probes].
- * @param queries
- *   The device pointer to the queries (NB: after rotation) [n_queries, dim].
- * @param index_list
- *   An optional device pointer to the enforced order of search [n_queries, n_probes].
- *   One can pass reordered indices here to try to improve data reading locality.
- * @param lut_scores
- *   The device pointer for storing the lookup table globally [gridDim.x, pq_dim << PqBits].
- *   Ignored when `EnableSMemLut == true`.
- * @param _out_scores
- *   The device pointer to the output scores
- *   [n_queries, max_samples] or [n_queries, n_probes, topk].
- * @param _out_indices
- *   The device pointer to the output indices [n_queries, n_probes, topk].
- *   These are the indices of the records as they appear in the database view formed by the probed
- *   clusters / defined by the `_chunk_indices`.
- *   The indices can have values within the range [0, max_samples).
- *   Ignored  when `Capacity == 0`.
- */
-template <typename OutT,
-          typename LutT,
-          uint32_t PqBits,
-          int Capacity,
-          bool PrecompBaseDiff,
-          bool EnableSMemLut>
-__global__ void compute_similarity_kernel(uint32_t n_rows,
-                                          uint32_t dim,
-                                          uint32_t n_probes,
-                                          uint32_t pq_dim,
-                                          uint32_t n_queries,
-                                          distance::DistanceType metric,
-                                          codebook_gen codebook_kind,
-                                          uint32_t topk,
-                                          uint32_t max_samples,
-                                          const float* cluster_centers,
-                                          const float* pq_centers,
-                                          const uint8_t* const* pq_dataset,
-                                          const uint32_t* cluster_labels,
-                                          const uint32_t* _chunk_indices,
-                                          const float* queries,
-                                          const uint32_t* index_list,
-                                          float* query_kths,
-                                          LutT* lut_scores,
-                                          OutT* _out_scores,
-                                          uint32_t* _out_indices)
-{
-  /* Shared memory:
-
-    * lut_scores: lookup table (LUT) of size = `pq_dim << PqBits`  (when EnableSMemLut)
-    * base_diff: size = dim (which is equal to `pq_dim * pq_len`)  or dim*2
-    * topk::block_sort: some amount of shared memory, but overlaps with the rest:
-        block_sort only needs shared memory for `.done()` operation, which can come very last.
-  */
-  extern __shared__ __align__(256) uint8_t smem_buf[];  // NOLINT
-  constexpr bool kManageLocalTopK = Capacity > 0;
-
-  constexpr uint32_t PqShift = 1u << PqBits;  // NOLINT
-  constexpr uint32_t PqMask  = PqShift - 1u;  // NOLINT
-
-  const uint32_t pq_len   = dim / pq_dim;
-  const uint32_t lut_size = pq_dim * PqShift;
-
-  if constexpr (EnableSMemLut) {
-    lut_scores = reinterpret_cast<LutT*>(smem_buf);
-  } else {
-    lut_scores += lut_size * blockIdx.x;
-  }
-
-  float* base_diff = nullptr;
-  if constexpr (PrecompBaseDiff) {
-    if constexpr (EnableSMemLut) {
-      base_diff = reinterpret_cast<float*>(lut_scores + lut_size);
-    } else {
-      base_diff = reinterpret_cast<float*>(smem_buf);
-    }
-  }
-
-  for (int ib = blockIdx.x; ib < n_queries * n_probes; ib += gridDim.x) {
-    if (ib >= gridDim.x) {
-      // sync shared memory accesses on the second and further iterations
-      __syncthreads();
-    }
-    uint32_t query_ix;
-    uint32_t probe_ix;
-    if (index_list == nullptr) {
-      query_ix = ib % n_queries;
-      probe_ix = ib / n_queries;
-    } else {
-      auto ordered_ix = index_list[ib];
-      query_ix        = ordered_ix / n_probes;
-      probe_ix        = ordered_ix % n_probes;
-    }
-
-    const uint32_t* chunk_indices = _chunk_indices + (n_probes * query_ix);
-    const float* query            = queries + (dim * query_ix);
-    OutT* out_scores;
-    uint32_t* out_indices = nullptr;
-    if constexpr (kManageLocalTopK) {
-      // Store topk calculated distances to out_scores (and its indices to out_indices)
-      out_scores  = _out_scores + topk * (probe_ix + (n_probes * query_ix));
-      out_indices = _out_indices + topk * (probe_ix + (n_probes * query_ix));
-    } else {
-      // Store all calculated distances to out_scores
-      out_scores = _out_scores + max_samples * query_ix;
-    }
-    uint32_t label              = cluster_labels[n_probes * query_ix + probe_ix];
-    const float* cluster_center = cluster_centers + (dim * label);
-    const float* pq_center;
-    if (codebook_kind == codebook_gen::PER_SUBSPACE) {
-      pq_center = pq_centers;
-    } else {
-      pq_center = pq_centers + (pq_len << PqBits) * label;
-    }
-
-    if constexpr (PrecompBaseDiff) {
-      // Reduce number of memory reads later by pre-computing parts of the score
-      switch (metric) {
-        case distance::DistanceType::L2SqrtExpanded:
-        case distance::DistanceType::L2Expanded: {
-          for (uint32_t i = threadIdx.x; i < dim; i += blockDim.x) {
-            base_diff[i] = query[i] - cluster_center[i];
-          }
-        } break;
-        case distance::DistanceType::InnerProduct: {
-          float2 pvals;
-          for (uint32_t i = threadIdx.x; i < dim; i += blockDim.x) {
-            pvals.x                                 = query[i];
-            pvals.y                                 = cluster_center[i] * pvals.x;
-            reinterpret_cast<float2*>(base_diff)[i] = pvals;
-          }
-        } break;
-        default: __builtin_unreachable();
-      }
-      __syncthreads();
-    }
-
-    {
-      // Create a lookup table
-      // For each subspace, the lookup table stores the distance between the actual query vector
-      // (projected into the subspace) and all possible pq vectors in that subspace.
-      for (uint32_t i = threadIdx.x; i < lut_size; i += blockDim.x) {
-        const uint32_t i_pq  = i >> PqBits;
-        uint32_t j           = i_pq * pq_len;
-        const uint32_t j_end = pq_len + j;
-        auto cur_pq_center   = pq_center + (i & PqMask) +
-                             (codebook_kind == codebook_gen::PER_SUBSPACE ? j * PqShift : 0u);
-        float score = 0.0;
-        do {
-          float pq_c = *cur_pq_center;
-          cur_pq_center += PqShift;
-          switch (metric) {
-            case distance::DistanceType::L2SqrtExpanded:
-            case distance::DistanceType::L2Expanded: {
-              float diff;
-              if constexpr (PrecompBaseDiff) {
-                diff = base_diff[j];
-              } else {
-                diff = query[j] - cluster_center[j];
-              }
-              diff -= pq_c;
-              score += diff * diff;
-            } break;
-            case distance::DistanceType::InnerProduct: {
-              // NB: we negate the scores as we hardcoded select-topk to always compute the minimum
-              float q;
-              if constexpr (PrecompBaseDiff) {
-                float2 pvals = reinterpret_cast<float2*>(base_diff)[j];
-                q            = pvals.x;
-                score -= pvals.y;
-              } else {
-                q = query[j];
-                score -= q * cluster_center[j];
-              }
-              score -= q * pq_c;
-            } break;
-            default: __builtin_unreachable();
-          }
-        } while (++j < j_end);
-        lut_scores[i] = LutT(score);
-      }
-    }
-
-    // Define helper types for efficient access to the pq_dataset, which is stored in an interleaved
-    // format. The chunks of PQ data are stored in kIndexGroupVecLen-bytes-long chunks, interleaved
-    // in groups of kIndexGroupSize elems (which is normally equal to the warp size) for the fastest
-    // possible access by thread warps.
-    //
-    // Consider one record in the pq_dataset is `pq_dim * pq_bits`-bit-long.
-    // Assuming `kIndexGroupVecLen = 16`, one chunk of data read by a thread at once is 128-bits.
-    // Then, such a chunk contains `chunk_size = 128 / pq_bits` record elements, and the record
-    // consists of `ceildiv(pq_dim, chunk_size)` chunks. The chunks are interleaved in groups of 32,
-    // so that the warp can achieve the best coalesced read throughput.
-    using group_align  = Pow2<kIndexGroupSize>;
-    using vec_align    = Pow2<kIndexGroupVecLen>;
-    using local_topk_t = block_sort_t<Capacity, OutT, uint32_t>;
-    using op_t         = uint32_t;
-    using vec_t        = TxN_t<op_t, kIndexGroupVecLen / sizeof(op_t)>;
-
-    uint32_t sample_offset = 0;
-    if (probe_ix > 0) { sample_offset = chunk_indices[probe_ix - 1]; }
-    uint32_t n_samples            = chunk_indices[probe_ix] - sample_offset;
-    uint32_t n_samples_aligned    = group_align::roundUp(n_samples);
-    constexpr uint32_t kChunkSize = (kIndexGroupVecLen * 8u) / PqBits;
-    uint32_t pq_line_width        = div_rounding_up_unsafe(pq_dim, kChunkSize) * kIndexGroupVecLen;
-    auto pq_thread_data = pq_dataset[label] + group_align::roundDown(threadIdx.x) * pq_line_width +
-                          group_align::mod(threadIdx.x) * vec_align::Value;
-    pq_line_width *= blockDim.x;
-
-    constexpr OutT kDummy = upper_bound<OutT>();
-    OutT query_kth        = kDummy;
-    if constexpr (kManageLocalTopK) { query_kth = OutT(query_kths[query_ix]); }
-    local_topk_t block_topk(topk, nullptr, query_kth);
-    OutT early_stop_limit = kDummy;
-    switch (metric) {
-      // If the metric is non-negative, we can use the query_kth approximation as an early stop
-      // threshold to skip some iterations when computing the score. Add such metrics here.
-      case distance::DistanceType::L2SqrtExpanded:
-      case distance::DistanceType::L2Expanded: {
-        early_stop_limit = query_kth;
-      } break;
-      default: break;
-    }
-
-    // Ensure lut_scores is written by all threads before using it in ivfpq-compute-score
-    __threadfence_block();
-    __syncthreads();
-
-    // Compute a distance for each sample
-    for (uint32_t i = threadIdx.x; i < n_samples_aligned;
-         i += blockDim.x, pq_thread_data += pq_line_width) {
-      OutT score = kDummy;
-      bool valid = i < n_samples;
-      if (valid) {
-        score = ivfpq_compute_score<OutT, LutT, vec_t, PqBits>(
-          pq_dim,
-          reinterpret_cast<const vec_t::io_t*>(pq_thread_data),
-          lut_scores,
-          early_stop_limit);
-      }
-      if constexpr (kManageLocalTopK) {
-        block_topk.add(score, sample_offset + i);
-      } else {
-        if (valid) { out_scores[sample_offset + i] = score; }
-      }
-    }
-    if constexpr (kManageLocalTopK) {
-      // sync threads before the topk merging operation, because we reuse smem_buf
-      __syncthreads();
-      block_topk.done(smem_buf);
-      block_topk.store(out_scores, out_indices);
-      if (threadIdx.x == 0) { atomicMin(query_kths + query_ix, float(out_scores[topk - 1])); }
-    } else {
-      // fill in the rest of the out_scores with dummy values
-      if (probe_ix + 1 == n_probes) {
-        for (uint32_t i = threadIdx.x + sample_offset + n_samples; i < max_samples;
-             i += blockDim.x) {
-          out_scores[i] = kDummy;
-        }
-      }
-    }
-  }
-}
-
-// The signature of the kernel defined by a minimal set of template parameters
-template <typename OutT, typename LutT>
-using compute_similarity_kernel_t =
-  decltype(&compute_similarity_kernel<OutT, LutT, 8, 0, true, true>);
-
-// The config struct lifts the runtime parameters to the template parameters
-template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
-struct compute_similarity_kernel_config {
- public:
-  static auto get(uint32_t pq_bits, uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
-  {
-    return kernel_choose_bits(pq_bits, k_max);
-  }
-
- private:
-  static auto kernel_choose_bits(uint32_t pq_bits, uint32_t k_max)
-    -> compute_similarity_kernel_t<OutT, LutT>
-  {
-    switch (pq_bits) {
-      case 4: return kernel_try_capacity<4, kMaxCapacity>(k_max);
-      case 5: return kernel_try_capacity<5, kMaxCapacity>(k_max);
-      case 6: return kernel_try_capacity<6, kMaxCapacity>(k_max);
-      case 7: return kernel_try_capacity<7, kMaxCapacity>(k_max);
-      case 8: return kernel_try_capacity<8, kMaxCapacity>(k_max);
-      default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
-    }
-  }
-
-  template <uint32_t PqBits, int Capacity>
-  static auto kernel_try_capacity(uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
-  {
-    if constexpr (Capacity > 0) {
-      if (k_max == 0 || k_max > Capacity) { return kernel_try_capacity<PqBits, 0>(k_max); }
-    }
-    if constexpr (Capacity > 1) {
-      if (k_max * 2 <= Capacity) { return kernel_try_capacity<PqBits, (Capacity / 2)>(k_max); }
-    }
-    return compute_similarity_kernel<OutT, LutT, PqBits, Capacity, PrecompBaseDiff, EnableSMemLut>;
-  }
-};
-
-// A standalone accessor function is necessary to make sure template specializations work correctly
-// (we "extern template" this function)
-template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
-auto get_compute_similarity_kernel(uint32_t pq_bits, uint32_t k_max)
-  -> compute_similarity_kernel_t<OutT, LutT>
-{
-  return compute_similarity_kernel_config<OutT, LutT, PrecompBaseDiff, EnableSMemLut>::get(pq_bits,
-                                                                                           k_max);
-}
-
 /**
  * An approximation to the number of times each cluster appears in a batched sample.
  *
@@ -930,318 +404,6 @@ constexpr inline auto expected_probe_coresidency(uint32_t n_clusters,
   return 1 + (n_queries - 1) * n_probes / (2 * n_clusters);
 }
 
-/**
- * Estimate a carveout value as expected by `cudaFuncAttributePreferredSharedMemoryCarveout`
- * (which does not take into account `reservedSharedMemPerBlock`),
- * given by a desired schmem-L1 split and a per-block memory requirement in bytes.
- *
- * NB: As per the programming guide, the memory carveout setting is just a hint for the driver; it's
- * free to choose any shmem-L1 configuration it deems appropriate. For example, if you set the
- * carveout to zero, it will choose a non-zero config that will allow to run at least one active
- * block per SM.
- *
- * @param shmem_fraction
- *   a fraction representing a desired split (shmem / (shmem + L1)) [0, 1].
- * @param shmem_per_block
- *   a shared memory usage per block (dynamic + static shared memory sizes), in bytes.
- * @param dev_props
- *   device properties.
- * @return
- *   a carveout value in percents [0, 100].
- */
-constexpr inline auto estimate_carveout(double shmem_fraction,
-                                        size_t shmem_per_block,
-                                        const cudaDeviceProp& dev_props) -> int
-{
-  using shmem_unit = Pow2<128>;
-  size_t m         = shmem_unit::roundUp(shmem_per_block);
-  size_t r         = dev_props.reservedSharedMemPerBlock;
-  size_t s         = dev_props.sharedMemPerMultiprocessor;
-  return (size_t(100 * s * m * shmem_fraction) - (m - 1) * r) / (s * (m + r));
-}
-
-/** Select an appropriate kernel instance and launch parameters. */
-template <typename OutT, typename LutT>
-struct compute_similarity {
-  /** Estimate the occupancy for the given kernel on the given device. */
-  struct occupancy_t {
-    using shmem_unit = Pow2<128>;
-
-    int blocks_per_sm = 0;
-    double occupancy  = 0.0;
-    double shmem_use  = 1.0;
-
-    inline occupancy_t() = default;
-    inline occupancy_t(size_t smem,
-                       uint32_t n_threads,
-                       compute_similarity_kernel_t<OutT, LutT> kernel,
-                       const cudaDeviceProp& dev_props)
-    {
-      RAFT_CUDA_TRY(
-        cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blocks_per_sm, kernel, n_threads, smem));
-      occupancy = double(blocks_per_sm * n_threads) / double(dev_props.maxThreadsPerMultiProcessor);
-      shmem_use = double(shmem_unit::roundUp(smem) * blocks_per_sm) /
-                  double(dev_props.sharedMemPerMultiprocessor);
-    }
-  };
-
-  struct selected {
-    compute_similarity_kernel_t<OutT, LutT> kernel;
-    dim3 grid_dim;
-    dim3 block_dim;
-    size_t smem_size;
-    size_t device_lut_size;
-
-    template <typename... Args>
-    void operator()(rmm::cuda_stream_view stream, Args... args)
-    {
-      kernel<<<grid_dim, block_dim, smem_size, stream>>>(args...);
-      RAFT_CHECK_CUDA(stream);
-    }
-  };
-
-  /**
-   * Use heuristics to choose an optimal instance of the search kernel.
-   * It selects among a few kernel variants (with/out using shared mem for
-   * lookup tables / precomputed distances) and tries to choose the block size
-   * to maximize kernel occupancy.
-   *
-   * @param manage_local_topk
-   *    whether use the fused calculate+select or just calculate the distances for each
-   *    query and probed cluster.
-   *
-   * @param locality_hint
-   *    beyond this limit do not consider increasing the number of active blocks per SM
-   *    would improve locality anymore.
-   */
-  static inline auto select(const cudaDeviceProp& dev_props,
-                            bool manage_local_topk,
-                            int locality_hint,
-                            double preferred_shmem_carveout,
-                            uint32_t pq_bits,
-                            uint32_t pq_dim,
-                            uint32_t precomp_data_count,
-                            uint32_t n_queries,
-                            uint32_t n_probes,
-                            uint32_t topk) -> selected
-  {
-    // Shared memory for storing the lookup table
-    size_t lut_mem = sizeof(LutT) * (pq_dim << pq_bits);
-    // Shared memory for storing pre-computed pieces to speedup the lookup table construction
-    // (e.g. the distance between a cluster center and the query for L2).
-    size_t bdf_mem = sizeof(float) * precomp_data_count;
-    // Shared memory for the fused top-k component; it may overlap with the other uses of shared
-    // memory and depends on the number of threads.
-    struct ltk_mem_t {
-      uint32_t subwarp_size;
-      uint32_t topk;
-      bool manage_local_topk;
-      ltk_mem_t(bool manage_local_topk, uint32_t topk)
-        : manage_local_topk(manage_local_topk), topk(topk)
-      {
-        subwarp_size = WarpSize;
-        while (topk * 2 <= subwarp_size) {
-          subwarp_size /= 2;
-        }
-      }
-
-      [[nodiscard]] auto operator()(uint32_t n_threads) const -> size_t
-      {
-        return manage_local_topk ? matrix::detail::select::warpsort::
-                                     template calc_smem_size_for_block_wide<OutT, uint32_t>(
-                                       n_threads / subwarp_size, topk)
-                                 : 0;
-      }
-    } ltk_mem{manage_local_topk, topk};
-
-    // Total amount of work; should be enough to occupy the GPU.
-    uint32_t n_blocks = n_queries * n_probes;
-
-    // The minimum block size we may want:
-    //   1. It's a power-of-two for efficient L1 caching of pq_centers values
-    //      (multiples of `1 << pq_bits`).
-    //   2. It should be large enough to fully utilize an SM.
-    uint32_t n_threads_min = WarpSize;
-    while (dev_props.maxBlocksPerMultiProcessor * int(n_threads_min) <
-           dev_props.maxThreadsPerMultiProcessor) {
-      n_threads_min *= 2;
-    }
-    // Further increase the minimum block size to make sure full device occupancy
-    // (NB: this may lead to `n_threads_min` being larger than the kernel's maximum)
-    while (int(n_blocks * n_threads_min) <
-             dev_props.multiProcessorCount * dev_props.maxThreadsPerMultiProcessor &&
-           int(n_threads_min) < dev_props.maxThreadsPerBlock) {
-      n_threads_min *= 2;
-    }
-    // Even further, increase it to allow less blocks per SM if there not enough queries.
-    // With this, we reduce the chance of different clusters being processed by two blocks
-    // on the same SM and thus improve the data locality for L1 caching.
-    while (int(n_queries * n_threads_min) < dev_props.maxThreadsPerMultiProcessor &&
-           int(n_threads_min) < dev_props.maxThreadsPerBlock) {
-      n_threads_min *= 2;
-    }
-
-    // Granularity of changing the number of threads when computing the maximum block size.
-    // It's good to have it multiple of the PQ book width.
-    uint32_t n_threads_gty = round_up_safe<uint32_t>(1u << pq_bits, WarpSize);
-
-    /*
-     Shared memory / L1 cache balance is the main limiter of this kernel.
-     The more blocks per SM we launch, the more shared memory we need. Besides that, we have
-     three versions of the kernel varying in performance and shmem usage.
-
-     We try the most demanding and the fastest kernel first, trying to maximize occupancy with
-     the minimum number of blocks (just one, really). Then, we tweak the `n_threads` to further
-     optimize occupancy and data locality for the L1 cache.
-     */
-    auto conf_fast        = get_compute_similarity_kernel<OutT, LutT, true, true>;
-    auto conf_no_basediff = get_compute_similarity_kernel<OutT, LutT, false, true>;
-    auto conf_no_smem_lut = get_compute_similarity_kernel<OutT, LutT, true, false>;
-    auto topk_or_zero     = manage_local_topk ? topk : 0u;
-    std::array candidates{
-      std::make_tuple(conf_fast(pq_bits, topk_or_zero), lut_mem + bdf_mem, true),
-      std::make_tuple(conf_no_basediff(pq_bits, topk_or_zero), lut_mem, true),
-      std::make_tuple(conf_no_smem_lut(pq_bits, topk_or_zero), bdf_mem, false)};
-
-    // we may allow slightly lower than 100% occupancy;
-    constexpr double kTargetOccupancy = 0.75;
-    // This struct is used to select the better candidate
-    occupancy_t selected_perf{};
-    selected selected_config;
-    for (auto [kernel, smem_size_const, lut_is_in_shmem] : candidates) {
-      if (smem_size_const > dev_props.sharedMemPerBlockOptin) {
-        // Even a single block cannot fit into an SM due to shmem requirements. Skip the candidate.
-        continue;
-      }
-
-      // First, we set the carveout hint to the preferred value. The driver will increase this if
-      // needed to run at least one block per SM. At the same time, if more blocks fit into one SM,
-      // this carveout value will limit the calculated occupancy. When we're done selecting the best
-      // launch configuration, we will tighten the carveout once more, based on the final memory
-      // usage and occupancy.
-      const int max_carveout =
-        estimate_carveout(preferred_shmem_carveout, smem_size_const, dev_props);
-      RAFT_CUDA_TRY(
-        cudaFuncSetAttribute(kernel, cudaFuncAttributePreferredSharedMemoryCarveout, max_carveout));
-
-      // Get the theoretical maximum possible number of threads per block
-      cudaFuncAttributes kernel_attrs;
-      RAFT_CUDA_TRY(cudaFuncGetAttributes(&kernel_attrs, kernel));
-      uint32_t n_threads =
-        round_down_safe<uint32_t>(kernel_attrs.maxThreadsPerBlock, n_threads_gty);
-
-      // Actual required shmem depens on the number of threads
-      size_t smem_size = max(smem_size_const, ltk_mem(n_threads));
-
-      // Make sure the kernel can get enough shmem.
-      cudaError_t cuda_status =
-        cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
-      if (cuda_status != cudaSuccess) {
-        RAFT_EXPECTS(
-          cuda_status == cudaGetLastError(),
-          "Tried to reset the expected cuda error code, but it didn't match the expectation");
-        // Failed to request enough shmem for the kernel. Skip the candidate.
-        continue;
-      }
-
-      occupancy_t cur(smem_size, n_threads, kernel, dev_props);
-      if (cur.blocks_per_sm <= 0) {
-        // For some reason, we still cannot make this kernel run. Skip the candidate.
-        continue;
-      }
-
-      {
-        // Try to reduce the number of threads to increase occupancy and data locality
-        auto n_threads_tmp = n_threads_min;
-        while (n_threads_tmp * 2 < n_threads) {
-          n_threads_tmp *= 2;
-        }
-        if (n_threads_tmp < n_threads) {
-          while (n_threads_tmp >= n_threads_min) {
-            auto smem_size_tmp = max(smem_size_const, ltk_mem(n_threads_tmp));
-            occupancy_t tmp(smem_size_tmp, n_threads_tmp, kernel, dev_props);
-            bool select_it = false;
-            if (lut_is_in_shmem && locality_hint >= tmp.blocks_per_sm) {
-              // Normally, the smaller the block the better for L1 cache hit rate.
-              // Hence, the occupancy should be "just good enough"
-              select_it = tmp.occupancy >= min(kTargetOccupancy, cur.occupancy);
-            } else if (lut_is_in_shmem) {
-              // If we don't have enough repeating probes (locality_hint < tmp.blocks_per_sm),
-              // the locality is not going to improve with increasing the number of blocks per SM.
-              // Hence, the only metric here is the occupancy.
-              bool improves_occupancy = tmp.occupancy > cur.occupancy;
-              // Otherwise, the performance still improves with a smaller block size,
-              // given there is enough work to do
-              bool improves_parallelism =
-                tmp.occupancy == cur.occupancy &&
-                7u * tmp.blocks_per_sm * dev_props.multiProcessorCount <= n_blocks;
-              select_it = improves_occupancy || improves_parallelism;
-            } else {
-              // If we don't use shared memory for the lookup table, increasing the number of blocks
-              // is very taxing on the global memory usage.
-              // In this case, the occupancy must increase a lot to make it worth the cost.
-              select_it = tmp.occupancy >= min(1.0, cur.occupancy / kTargetOccupancy);
-            }
-            if (select_it) {
-              n_threads = n_threads_tmp;
-              smem_size = smem_size_tmp;
-              cur       = tmp;
-            }
-            n_threads_tmp /= 2;
-          }
-        }
-      }
-
-      {
-        if (selected_perf.occupancy <= 0.0                 // no candidate yet
-            || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy &&
-                selected_perf.shmem_use >= cur.shmem_use)  // much improved occupancy
-        ) {
-          selected_perf = cur;
-          if (lut_is_in_shmem) {
-            selected_config = {
-              kernel, dim3(n_blocks, 1, 1), dim3(n_threads, 1, 1), smem_size, size_t(0)};
-          } else {
-            // When the global memory is used for the lookup table, we need to minimize the grid
-            // size; otherwise, the kernel may quickly run out of memory.
-            auto n_blocks_min =
-              std::min<uint32_t>(n_blocks, cur.blocks_per_sm * dev_props.multiProcessorCount);
-            selected_config = {kernel,
-                               dim3(n_blocks_min, 1, 1),
-                               dim3(n_threads, 1, 1),
-                               smem_size,
-                               size_t(n_blocks_min) * size_t(pq_dim << pq_bits)};
-          }
-          // Actual shmem/L1 split wildly rounds up the specified preferred carveout, so we set here
-          // a rather conservative bar; most likely, the kernel gets more shared memory than this,
-          // and the occupancy doesn't get hurt.
-          auto carveout = std::min<int>(max_carveout, std::ceil(100.0 * cur.shmem_use));
-          RAFT_CUDA_TRY(
-            cudaFuncSetAttribute(kernel, cudaFuncAttributePreferredSharedMemoryCarveout, carveout));
-          if (cur.occupancy >= kTargetOccupancy) { break; }
-        } else if (selected_perf.occupancy > 0.0) {
-          // If we found a reasonable candidate on a previous iteration, and this one is not better,
-          // then don't try any more candidates because they are much slower anyway.
-          break;
-        }
-      }
-    }
-
-    RAFT_EXPECTS(selected_perf.occupancy > 0.0,
-                 "Couldn't determine a working kernel launch configuration.");
-
-    return selected_config;
-  }
-};
-
-inline auto is_local_topk_feasible(uint32_t k, uint32_t n_probes, uint32_t n_queries) -> bool
-{
-  if (k > kMaxCapacity) { return false; }             // warp_sort not possible
-  if (n_probes <= 16) { return false; }               // too few clusters
-  if (n_queries * n_probes <= 256) { return false; }  // overall amount of work is too small
-  return true;
-}
-
 /**
  * The "main part" of the search, which assumes that outer-level `search` has already:
  *
@@ -1364,16 +526,16 @@ void ivfpq_search_worker(raft::device_resources const& handle,
     } break;
   }
 
-  auto search_instance = compute_similarity<ScoreT, LutT>::select(handle.get_device_properties(),
-                                                                  manage_local_topk,
-                                                                  coresidency,
-                                                                  preferred_shmem_carveout,
-                                                                  index.pq_bits(),
-                                                                  index.pq_dim(),
-                                                                  precomp_data_count,
-                                                                  n_queries,
-                                                                  n_probes,
-                                                                  topK);
+  auto search_instance = compute_similarity_select<ScoreT, LutT>(handle.get_device_properties(),
+                                                                 manage_local_topk,
+                                                                 coresidency,
+                                                                 preferred_shmem_carveout,
+                                                                 index.pq_bits(),
+                                                                 index.pq_dim(),
+                                                                 precomp_data_count,
+                                                                 n_queries,
+                                                                 n_probes,
+                                                                 topK);
 
   rmm::device_uvector<LutT> device_lut(search_instance.device_lut_size, stream, mr);
   std::optional<device_vector<float>> query_kths_buf{std::nullopt};
@@ -1386,27 +548,28 @@ void ivfpq_search_worker(raft::device_resources const& handle,
                 raft::const_op<float>{dummy_block_sort_t<ScoreT, IdxT>::queue_t::kDummy});
     query_kths = query_kths_buf->data_handle();
   }
-  search_instance(stream,
-                  index.size(),
-                  index.rot_dim(),
-                  n_probes,
-                  index.pq_dim(),
-                  n_queries,
-                  index.metric(),
-                  index.codebook_kind(),
-                  topK,
-                  max_samples,
-                  index.centers_rot().data_handle(),
-                  index.pq_centers().data_handle(),
-                  index.data_ptrs().data_handle(),
-                  clusters_to_probe,
-                  chunk_index.data(),
-                  query,
-                  index_list_sorted,
-                  query_kths,
-                  device_lut.data(),
-                  distances_buf.data(),
-                  neighbors_ptr);
+  compute_similarity_run(search_instance,
+                         stream,
+                         index.size(),
+                         index.rot_dim(),
+                         n_probes,
+                         index.pq_dim(),
+                         n_queries,
+                         index.metric(),
+                         index.codebook_kind(),
+                         topK,
+                         max_samples,
+                         index.centers_rot().data_handle(),
+                         index.pq_centers().data_handle(),
+                         index.data_ptrs().data_handle(),
+                         clusters_to_probe,
+                         chunk_index.data(),
+                         query,
+                         index_list_sorted,
+                         query_kths,
+                         device_lut.data(),
+                         distances_buf.data(),
+                         neighbors_ptr);
 
   // Select topk vectors for each query
   rmm::device_uvector<ScoreT> topk_dists(n_queries * topK, stream, mr);
diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
index b3c4818e70..879aafee32 100644
--- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
@@ -36,6 +36,7 @@
 #include <raft/neighbors/detail/selection_faiss.cuh>
 #include <raft/spatial/knn/detail/fused_l2_knn.cuh>
 #include <raft/spatial/knn/detail/haversine_distance.cuh>
+#include <raft/spatial/knn/detail/processing.cuh>
 #include <set>
 #include <thrust/iterator/transform_iterator.h>
 
@@ -159,7 +160,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
       // calculate the top-k elements for the current tile, by calculating the
       // full pairwise distance for the tile - and then selecting the top-k from that
       // note: we're using a int32 IndexType here on purpose in order to
-      // use the pairwise_distance specializations. Since the tile size will ensure
+      // use the pairwise_distance instantiations. Since the tile size will ensure
       // that the total memory is < 1GB per tile, this will not cause any issues
       distance::pairwise_distance<ElementType, int>(handle,
                                                     search + i * d,
diff --git a/cpp/include/raft/neighbors/detail/refine.cuh b/cpp/include/raft/neighbors/detail/refine.cuh
index aedfc42698..0ff5e4cdbc 100644
--- a/cpp/include/raft/neighbors/detail/refine.cuh
+++ b/cpp/include/raft/neighbors/detail/refine.cuh
@@ -20,7 +20,9 @@
 #include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/neighbors/detail/ivf_flat_build.cuh>
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan.cuh>
 #include <raft/neighbors/detail/ivf_flat_search.cuh>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
@@ -116,15 +118,6 @@ void refine_device(raft::device_resources const& handle,
                                                            neighbor_candidates.data_handle(),
                                                            n_queries,
                                                            n_candidates);
-
-  // greppable-id-specializations-ivf-flat-search: The ivfflat_interleaved_scan
-  // function is used in both raft::neighbors::ivf_flat::search and
-  // raft::neighbors::detail::refine_device. To prevent a duplicate
-  // instantiation of this function (which defines ~270 kernels) in the refine
-  // specializations, an extern template definition is provided. Please check
-  // and adjust the extern template definition and the instantiation when the
-  // below function call is edited. Search for
-  // `greppable-id-specializations-ivf-flat-search` to find them.
   uint32_t grid_dim_x = 1;
   raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<
     data_t,
diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
new file mode 100644
index 0000000000..8636ee9596
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>                                            // size_t
+#include <cstdint>                                            // uint32_t
+#include <raft/neighbors/detail/selection_faiss_helpers.cuh>  // kFaissMaxK
+#include <raft/util/raft_explicit.hpp>                        // RAFT_EXPLICIT
+
+#if defined(RAFT_EXPLICIT_INSTANTIATE_ONLY)
+
+namespace raft::neighbors::detail {
+
+template <typename payload_t = int, typename key_t = float>
+void select_k(const key_t* inK,
+              const payload_t* inV,
+              size_t n_rows,
+              size_t n_cols,
+              key_t* outK,
+              payload_t* outV,
+              bool select_min,
+              int k,
+              cudaStream_t stream) RAFT_EXPLICIT;
+};      // namespace raft::neighbors::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)           \
+  extern template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                         const payload_t* inV, \
+                                                         size_t n_rows,        \
+                                                         size_t n_cols,        \
+                                                         key_t* outK,          \
+                                                         payload_t* outV,      \
+                                                         bool select_min,      \
+                                                         int k,                \
+                                                         cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(uint32_t, float);
+instantiate_raft_neighbors_detail_select_k(int32_t, float);
+instantiate_raft_neighbors_detail_select_k(long, float);
+instantiate_raft_neighbors_detail_select_k(size_t, double);
+// test/neighbors/selection.cu
+instantiate_raft_neighbors_detail_select_k(int, double);
+instantiate_raft_neighbors_detail_select_k(size_t, float);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-inl.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-inl.cuh
new file mode 100644
index 0000000000..d2e3206993
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/selection_faiss-inl.cuh
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/util/cudart_utils.hpp>
+#include <raft/util/pow2_utils.cuh>
+
+#include <raft/neighbors/detail/faiss_select/Select.cuh>
+#include <raft/neighbors/detail/selection_faiss_helpers.cuh>  // kFaissMaxK
+
+namespace raft::neighbors::detail {
+
+template <typename payload_t, typename key_t, bool select_min, int warp_q, int thread_q, int tpb>
+__global__ void select_k_kernel(const key_t* inK,
+                                const payload_t* inV,
+                                size_t n_rows,
+                                size_t n_cols,
+                                key_t* outK,
+                                payload_t* outV,
+                                key_t initK,
+                                payload_t initV,
+                                int k)
+{
+  using align_warp        = Pow2<WarpSize>;
+  constexpr int kNumWarps = align_warp::div(tpb);
+
+  __shared__ key_t smemK[kNumWarps * warp_q];
+  __shared__ payload_t smemV[kNumWarps * warp_q];
+
+  faiss_select::BlockSelect<key_t,
+                            payload_t,
+                            select_min,
+                            faiss_select::Comparator<key_t>,
+                            warp_q,
+                            thread_q,
+                            tpb>
+    heap(initK, initV, smemK, smemV, k);
+
+  // Grid is exactly sized to rows available
+  int row = blockIdx.x;
+  {
+    size_t i = size_t(threadIdx.x);
+
+    inK += row * n_cols;
+    if (inV != nullptr) { inV += row * n_cols; }
+
+    // Whole warps must participate in the selection
+    size_t limit = align_warp::roundDown(n_cols);
+
+    for (; i < limit; i += tpb) {
+      heap.add(inK[i], (inV != nullptr) ? inV[i] : payload_t(i));
+    }
+
+    // Handle last remainder fraction of a warp of elements
+    if (i < n_cols) { heap.addThreadQ(inK[i], (inV != nullptr) ? inV[i] : payload_t(i)); }
+  }
+
+  heap.reduce();
+
+  for (int i = threadIdx.x; i < k; i += tpb) {
+    outK[row * k + i] = smemK[i];
+    outV[row * k + i] = smemV[i];
+  }
+}
+
+template <typename payload_t = int, typename key_t = float, int warp_q, int thread_q>
+inline void select_k_impl(const key_t* inK,
+                          const payload_t* inV,
+                          size_t n_rows,
+                          size_t n_cols,
+                          key_t* outK,
+                          payload_t* outV,
+                          bool select_min,
+                          int k,
+                          cudaStream_t stream)
+{
+  auto grid = dim3(n_rows);
+
+  constexpr int n_threads = (warp_q <= 1024) ? 128 : 64;
+  auto block              = dim3(n_threads);
+
+  auto kInit = select_min ? upper_bound<key_t>() : lower_bound<key_t>();
+  auto vInit = -1;
+  if (select_min) {
+    select_k_kernel<payload_t, key_t, false, warp_q, thread_q, n_threads>
+      <<<grid, block, 0, stream>>>(inK, inV, n_rows, n_cols, outK, outV, kInit, vInit, k);
+  } else {
+    select_k_kernel<payload_t, key_t, true, warp_q, thread_q, n_threads>
+      <<<grid, block, 0, stream>>>(inK, inV, n_rows, n_cols, outK, outV, kInit, vInit, k);
+  }
+  RAFT_CUDA_TRY(cudaGetLastError());
+}
+
+/**
+ * @brief Select the k-nearest neighbors from dense
+ * distance and index matrices.
+ *
+ * @param[in] inK partitioned knn distance matrix
+ * @param[in] inV partitioned knn index matrix
+ * @param[in] n_rows number of rows in distance and index matrices
+ * @param[in] n_cols number of columns in distance and index matrices
+ * @param[out] outK merged knn distance matrix
+ * @param[out] outV merged knn index matrix
+ * @param[in] select_min whether to select the min or the max distances
+ * @param[in] k number of neighbors per partition (also number of merged neighbors)
+ * @param[in] stream CUDA stream to use
+ */
+template <typename payload_t = int, typename key_t = float>
+inline void select_k(const key_t* inK,
+                     const payload_t* inV,
+                     size_t n_rows,
+                     size_t n_cols,
+                     key_t* outK,
+                     payload_t* outV,
+                     bool select_min,
+                     int k,
+                     cudaStream_t stream)
+{
+  constexpr int max_k = kFaissMaxK<payload_t, key_t>();
+  if (k == 1)
+    select_k_impl<payload_t, key_t, 1, 1>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 32)
+    select_k_impl<payload_t, key_t, 32, 2>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 64)
+    select_k_impl<payload_t, key_t, 64, 3>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 128)
+    select_k_impl<payload_t, key_t, 128, 3>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 256)
+    select_k_impl<payload_t, key_t, 256, 4>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 512)
+    select_k_impl<payload_t, key_t, 512, 8>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 1024 && k <= max_k)
+    // note: have to use constexpr std::min here to avoid instantiating templates
+    // for parameters we don't support
+    select_k_impl<payload_t, key_t, std::min(1024, max_k), 8>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else if (k <= 2048 && k <= max_k)
+    select_k_impl<payload_t, key_t, std::min(2048, max_k), 8>(
+      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
+  else
+    ASSERT(k <= max_k, "Current max k is %d (requested %d)", max_k, k);
+}
+};  // namespace raft::neighbors::detail
diff --git a/cpp/include/raft/neighbors/detail/selection_faiss.cuh b/cpp/include/raft/neighbors/detail/selection_faiss.cuh
index 5df42e94b9..dd229b37e8 100644
--- a/cpp/include/raft/neighbors/detail/selection_faiss.cuh
+++ b/cpp/include/raft/neighbors/detail/selection_faiss.cuh
@@ -13,157 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/util/cudart_utils.hpp>
-#include <raft/util/pow2_utils.cuh>
-
-#include <raft/neighbors/detail/faiss_select/Select.cuh>
-
-namespace raft::neighbors::detail {
-
-template <typename payload_t, typename key_t>
-constexpr int kFaissMaxK()
-{
-  if (sizeof(key_t) >= 8) { return sizeof(payload_t) >= 8 ? 512 : 1024; }
-  return 2048;
-}
-
-template <typename payload_t, typename key_t, bool select_min, int warp_q, int thread_q, int tpb>
-__global__ void select_k_kernel(const key_t* inK,
-                                const payload_t* inV,
-                                size_t n_rows,
-                                size_t n_cols,
-                                key_t* outK,
-                                payload_t* outV,
-                                key_t initK,
-                                payload_t initV,
-                                int k)
-{
-  using align_warp        = Pow2<WarpSize>;
-  constexpr int kNumWarps = align_warp::div(tpb);
-
-  __shared__ key_t smemK[kNumWarps * warp_q];
-  __shared__ payload_t smemV[kNumWarps * warp_q];
-
-  faiss_select::BlockSelect<key_t,
-                            payload_t,
-                            select_min,
-                            faiss_select::Comparator<key_t>,
-                            warp_q,
-                            thread_q,
-                            tpb>
-    heap(initK, initV, smemK, smemV, k);
-
-  // Grid is exactly sized to rows available
-  int row = blockIdx.x;
-  {
-    size_t i = size_t(threadIdx.x);
-
-    inK += row * n_cols;
-    if (inV != nullptr) { inV += row * n_cols; }
-
-    // Whole warps must participate in the selection
-    size_t limit = align_warp::roundDown(n_cols);
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "selection_faiss-inl.cuh"
+#endif
 
-    for (; i < limit; i += tpb) {
-      heap.add(inK[i], (inV != nullptr) ? inV[i] : payload_t(i));
-    }
-
-    // Handle last remainder fraction of a warp of elements
-    if (i < n_cols) { heap.addThreadQ(inK[i], (inV != nullptr) ? inV[i] : payload_t(i)); }
-  }
-
-  heap.reduce();
-
-  for (int i = threadIdx.x; i < k; i += tpb) {
-    outK[row * k + i] = smemK[i];
-    outV[row * k + i] = smemV[i];
-  }
-}
-
-template <typename payload_t = int, typename key_t = float, int warp_q, int thread_q>
-inline void select_k_impl(const key_t* inK,
-                          const payload_t* inV,
-                          size_t n_rows,
-                          size_t n_cols,
-                          key_t* outK,
-                          payload_t* outV,
-                          bool select_min,
-                          int k,
-                          cudaStream_t stream)
-{
-  auto grid = dim3(n_rows);
-
-  constexpr int n_threads = (warp_q <= 1024) ? 128 : 64;
-  auto block              = dim3(n_threads);
-
-  auto kInit = select_min ? upper_bound<key_t>() : lower_bound<key_t>();
-  auto vInit = -1;
-  if (select_min) {
-    select_k_kernel<payload_t, key_t, false, warp_q, thread_q, n_threads>
-      <<<grid, block, 0, stream>>>(inK, inV, n_rows, n_cols, outK, outV, kInit, vInit, k);
-  } else {
-    select_k_kernel<payload_t, key_t, true, warp_q, thread_q, n_threads>
-      <<<grid, block, 0, stream>>>(inK, inV, n_rows, n_cols, outK, outV, kInit, vInit, k);
-  }
-  RAFT_CUDA_TRY(cudaGetLastError());
-}
-
-/**
- * @brief Select the k-nearest neighbors from dense
- * distance and index matrices.
- *
- * @param[in] inK partitioned knn distance matrix
- * @param[in] inV partitioned knn index matrix
- * @param[in] n_rows number of rows in distance and index matrices
- * @param[in] n_cols number of columns in distance and index matrices
- * @param[out] outK merged knn distance matrix
- * @param[out] outV merged knn index matrix
- * @param[in] select_min whether to select the min or the max distances
- * @param[in] k number of neighbors per partition (also number of merged neighbors)
- * @param[in] stream CUDA stream to use
- */
-template <typename payload_t = int, typename key_t = float>
-inline void select_k(const key_t* inK,
-                     const payload_t* inV,
-                     size_t n_rows,
-                     size_t n_cols,
-                     key_t* outK,
-                     payload_t* outV,
-                     bool select_min,
-                     int k,
-                     cudaStream_t stream)
-{
-  constexpr int max_k = kFaissMaxK<payload_t, key_t>();
-  if (k == 1)
-    select_k_impl<payload_t, key_t, 1, 1>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 32)
-    select_k_impl<payload_t, key_t, 32, 2>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 64)
-    select_k_impl<payload_t, key_t, 64, 3>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 128)
-    select_k_impl<payload_t, key_t, 128, 3>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 256)
-    select_k_impl<payload_t, key_t, 256, 4>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 512)
-    select_k_impl<payload_t, key_t, 512, 8>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 1024 && k <= max_k)
-    // note: have to use constexpr std::min here to avoid instantiating templates
-    // for parameters we don't support
-    select_k_impl<payload_t, key_t, std::min(1024, max_k), 8>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else if (k <= 2048 && k <= max_k)
-    select_k_impl<payload_t, key_t, std::min(2048, max_k), 8>(
-      inK, inV, n_rows, n_cols, outK, outV, select_min, k, stream);
-  else
-    ASSERT(k <= max_k, "Current max k is %d (requested %d)", max_k, k);
-}
-};  // namespace raft::neighbors::detail
+#ifdef RAFT_COMPILED
+#include "selection_faiss-ext.cuh"
+#endif
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu b/cpp/include/raft/neighbors/detail/selection_faiss_helpers.cuh
similarity index 54%
rename from cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
rename to cpp/include/raft/neighbors/detail/selection_faiss_helpers.cuh
index f543369de5..c4b69f21ec 100644
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
+++ b/cpp/include/raft/neighbors/detail/selection_faiss_helpers.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,18 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
+#pragma once
 
-#include <cuda_fp16.h>
+namespace raft::neighbors::detail {
 
-namespace raft::neighbors::ivf_pq::detail {
+// This function is used in cpp/test/neighbors/select.cu. We want to make it
+// available through both the selection_faiss-inl.cuh and
+// selection_faiss-ext.cuh headers.
+template <typename payload_t, typename key_t>
+constexpr int kFaissMaxK()
+{
+  if (sizeof(key_t) >= 8) { return sizeof(payload_t) >= 8 ? 512 : 1024; }
+  return 2048;
+}
 
-template auto get_compute_similarity_kernel<float, float, false, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, float>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
+}  // namespace raft::neighbors::detail
diff --git a/cpp/include/raft/neighbors/ivf_flat-ext.cuh b/cpp/include/raft/neighbors/ivf_flat-ext.cuh
new file mode 100644
index 0000000000..2dfe8dcc78
--- /dev/null
+++ b/cpp/include/raft/neighbors/ivf_flat-ext.cuh
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                                // int64_t
+
+#include <raft/core/device_mdspan.hpp>            // raft::device_matrix_view
+#include <raft/core/device_resources.hpp>         // raft::device_resources
+#include <raft/neighbors/ivf_flat_serialize.cuh>
+#include <raft/neighbors/ivf_flat_types.hpp>      // raft::neighbors::ivf_flat::index
+#include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
+#include <rmm/mr/device/per_device_resource.hpp>  // rmm::mr::device_memory_resource
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ivf_flat {
+
+template <typename T, typename IdxT>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           const T* dataset,
+           IdxT n_rows,
+           uint32_t dim) -> index<T, IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, IdxT, row_major> dataset)
+  -> index<T, IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void build(raft::device_resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, IdxT, row_major> dataset,
+           raft::neighbors::ivf_flat::index<T, IdxT>& idx) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            const index<T, IdxT>& orig_index,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) -> index<T, IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+            const index<T, IdxT>& orig_index) -> index<T, IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            index<T, IdxT>* index,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+            index<T, IdxT>* index) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<T, IdxT>& index,
+            const T* queries,
+            uint32_t n_queries,
+            uint32_t k,
+            IdxT* neighbors,
+            float* distances,
+            rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<T, IdxT>& index,
+            raft::device_matrix_view<const T, IdxT, row_major> queries,
+            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+            raft::device_matrix_view<float, IdxT, row_major> distances) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ivf_flat
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)        \
+  extern template auto raft::neighbors::ivf_flat::build<T, IdxT>( \
+    raft::device_resources const& handle,                         \
+    const raft::neighbors::ivf_flat::index_params& params,        \
+    const T* dataset,                                             \
+    IdxT n_rows,                                                  \
+    uint32_t dim)                                                 \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \
+                                                                  \
+  extern template auto raft::neighbors::ivf_flat::build<T, IdxT>( \
+    raft::device_resources const& handle,                         \
+    const raft::neighbors::ivf_flat::index_params& params,        \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset)   \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \
+                                                                  \
+  extern template void raft::neighbors::ivf_flat::build<T, IdxT>( \
+    raft::device_resources const& handle,                         \
+    const raft::neighbors::ivf_flat::index_params& params,        \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset,   \
+    raft::neighbors::ivf_flat::index<T, IdxT>& idx);
+
+instantiate_raft_neighbors_ivf_flat_build(float, int64_t);
+instantiate_raft_neighbors_ivf_flat_build(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_build(uint8_t, int64_t);
+#undef instantiate_raft_neighbors_ivf_flat_build
+
+#define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
+  extern template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \
+    raft::device_resources const& handle,                                  \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows)                                                           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  extern template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  extern template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \
+    raft::device_resources const& handle,                                  \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows);                                                          \
+                                                                           \
+  extern template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index);
+
+instantiate_raft_neighbors_ivf_flat_extend(float, int64_t);
+instantiate_raft_neighbors_ivf_flat_extend(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_extend(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_extend
+
+#define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)        \
+  extern template void raft::neighbors::ivf_flat::search<T, IdxT>( \
+    raft::device_resources const& handle,                          \
+    const raft::neighbors::ivf_flat::search_params& params,        \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \
+    const T* queries,                                              \
+    uint32_t n_queries,                                            \
+    uint32_t k,                                                    \
+    IdxT* neighbors,                                               \
+    float* distances,                                              \
+    rmm::mr::device_memory_resource* mr);                          \
+                                                                   \
+  extern template void raft::neighbors::ivf_flat::search<T, IdxT>( \
+    raft::device_resources const& handle,                          \
+    const raft::neighbors::ivf_flat::search_params& params,        \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \
+    raft::device_matrix_view<const T, IdxT, row_major> queries,    \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,     \
+    raft::device_matrix_view<float, IdxT, row_major> distances);
+
+instantiate_raft_neighbors_ivf_flat_search(float, int64_t);
+instantiate_raft_neighbors_ivf_flat_search(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_search(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_search
diff --git a/cpp/include/raft/neighbors/ivf_flat-inl.cuh b/cpp/include/raft/neighbors/ivf_flat-inl.cuh
new file mode 100644
index 0000000000..4f8d7f596e
--- /dev/null
+++ b/cpp/include/raft/neighbors/ivf_flat-inl.cuh
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/neighbors/detail/ivf_flat_build.cuh>
+#include <raft/neighbors/detail/ivf_flat_search.cuh>
+#include <raft/neighbors/ivf_flat_serialize.cuh>
+#include <raft/neighbors/ivf_flat_types.hpp>
+
+#include <raft/core/device_resources.hpp>
+
+#include <raft/core/device_mdspan.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+namespace raft::neighbors::ivf_flat {
+
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * NB: Currently, the following distance metrics are supported:
+ * - L2Expanded
+ * - L2Unexpanded
+ * - InnerProduct
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   // use default index parameters
+ *   ivf_flat::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = ivf_flat::build(handle, index_params, dataset, N, D);
+ *   // use default search parameters
+ *   ivf_flat::search_params search_params;
+ *   // search K nearest neighbours for each of the N queries
+ *   ivf_flat::search(handle, search_params, index, queries, N, K, out_inds, out_dists);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] params configure the index building
+ * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim]
+ * @param[in] n_rows the number of samples
+ * @param[in] dim the dimensionality of the data
+ *
+ * @return the constructed ivf-flat index
+ */
+template <typename T, typename IdxT>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           const T* dataset,
+           IdxT n_rows,
+           uint32_t dim) -> index<T, IdxT>
+{
+  return raft::neighbors::ivf_flat::detail::build(handle, params, dataset, n_rows, dim);
+}
+
+/**
+ * @defgroup ivf_flat IVF Flat Algorithm
+ * @{
+ */
+
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * NB: Currently, the following distance metrics are supported:
+ * - L2Expanded
+ * - L2Unexpanded
+ * - InnerProduct
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   // use default index parameters
+ *   ivf_flat::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = ivf_flat::build(handle, dataset, index_params);
+ *   // use default search parameters
+ *   ivf_flat::search_params search_params;
+ *   // search K nearest neighbours for each of the N queries
+ *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] params configure the index building
+ * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed ivf-flat index
+ */
+template <typename T, typename IdxT>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, IdxT, row_major> dataset) -> index<T, IdxT>
+{
+  return raft::neighbors::ivf_flat::detail::build(handle,
+                                                  params,
+                                                  dataset.data_handle(),
+                                                  static_cast<IdxT>(dataset.extent(0)),
+                                                  static_cast<IdxT>(dataset.extent(1)));
+}
+
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * NB: Currently, the following distance metrics are supported:
+ * - L2Expanded
+ * - L2Unexpanded
+ * - InnerProduct
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   // use default index parameters
+ *   ivf_flat::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   ivf_flat::index<decltype(dataset::value_type), decltype(dataset::index_type)> index;
+ *   ivf_flat::build(handle, dataset, index_params, index);
+ *   // use default search parameters
+ *   ivf_flat::search_params search_params;
+ *   // search K nearest neighbours for each of the N queries
+ *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] params configure the index building
+ * @param[in] dataset raft::device_matrix_view to a row-major matrix [n_rows, dim]
+ * @param[out] idx reference to ivf_flat::index
+ *
+ */
+template <typename T, typename IdxT>
+void build(raft::device_resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, IdxT, row_major> dataset,
+           raft::neighbors::ivf_flat::index<T, IdxT>& idx)
+{
+  idx = raft::neighbors::ivf_flat::detail::build(handle,
+                                                 params,
+                                                 dataset.data_handle(),
+                                                 static_cast<IdxT>(dataset.extent(0)),
+                                                 static_cast<IdxT>(dataset.extent(1)));
+}
+
+/** @} */
+
+/**
+ * @brief Build a new index containing the data of the original plus new extra vectors.
+ *
+ * Implementation note:
+ *    The new data is clustered according to existing kmeans clusters, then the cluster
+ *    centers are adjusted to match the newly labeled data.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   ivf_flat::index_params index_params;
+ *   index_params.add_data_on_build = false;      // don't populate index on build
+ *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
+ *   // train the index from a [N, D] dataset
+ *   auto index_empty = ivf_flat::build(handle, index_params, dataset, N, D);
+ *   // fill the index with the data
+ *   auto index = ivf_flat::extend(handle, index_empty, dataset, nullptr, N);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] orig_index original index
+ * @param[in] new_vectors a device pointer to a row-major matrix [n_rows, index.dim()]
+ * @param[in] new_indices a device pointer to a vector of indices [n_rows].
+ *    If the original index is empty (`orig_index.size() == 0`), you can pass `nullptr`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[in] n_rows number of rows in `new_vectors`
+ *
+ * @return the constructed extended ivf-flat index
+ */
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            const index<T, IdxT>& orig_index,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) -> index<T, IdxT>
+{
+  return raft::neighbors::ivf_flat::detail::extend(
+    handle, orig_index, new_vectors, new_indices, n_rows);
+}
+
+/**
+ * @ingroup ivf_flat
+ * @{
+ */
+
+/**
+ * @brief Build a new index containing the data of the original plus new extra vectors.
+ *
+ * Implementation note:
+ *    The new data is clustered according to existing kmeans clusters, then the cluster
+ *    centers are adjusted to match the newly labeled data.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   ivf_flat::index_params index_params;
+ *   index_params.add_data_on_build = false;      // don't populate index on build
+ *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
+ *   // train the index from a [N, D] dataset
+ *   auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset);
+ *   // fill the index with the data
+ *   std::optional<raft::device_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
+ *   auto index = ivf_flat::extend(handle, index_empty, no_op, dataset);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
+ * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
+ *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[in] orig_index original index
+ *
+ * @return the constructed extended ivf-flat index
+ */
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+            const index<T, IdxT>& orig_index) -> index<T, IdxT>
+{
+  return extend<T, IdxT>(handle,
+                         orig_index,
+                         new_vectors.data_handle(),
+                         new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                         new_vectors.extent(0));
+}
+
+/** @} */
+
+/**
+ * @brief Extend the index in-place with the new data.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   ivf_flat::index_params index_params;
+ *   index_params.add_data_on_build = false;      // don't populate index on build
+ *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
+ *   // train the index from a [N, D] dataset
+ *   auto index_empty = ivf_flat::build(handle, index_params, dataset, N, D);
+ *   // fill the index with the data
+ *   ivf_flat::extend(handle, index_empty, dataset, nullptr, N);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param handle
+ * @param[inout] index
+ * @param[in] new_vectors a device pointer to a row-major matrix [n_rows, index.dim()]
+ * @param[in] new_indices a device pointer to a vector of indices [n_rows].
+ *    If the original index is empty (`orig_index.size() == 0`), you can pass `nullptr`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[in] n_rows the number of samples
+ */
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            index<T, IdxT>* index,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows)
+{
+  raft::neighbors::ivf_flat::detail::extend(handle, index, new_vectors, new_indices, n_rows);
+}
+
+/**
+ * @ingroup ivf_flat
+ * @{
+ */
+
+/**
+ * @brief Extend the index in-place with the new data.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   ivf_flat::index_params index_params;
+ *   index_params.add_data_on_build = false;      // don't populate index on build
+ *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
+ *   // train the index from a [N, D] dataset
+ *   auto index_empty = ivf_flat::build(handle, index_params, dataset);
+ *   // fill the index with the data
+ *   std::optional<raft::device_vector_view<const IdxT, IdxT>> no_op = std::nullopt;
+ *   ivf_flat::extend(handle, dataset, no_opt, &index_empty);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
+ * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
+ *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[inout] index pointer to index, to be overwritten in-place
+ */
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+            index<T, IdxT>* index)
+{
+  extend(handle,
+         index,
+         new_vectors.data_handle(),
+         new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+         static_cast<IdxT>(new_vectors.extent(0)));
+}
+
+/** @} */
+
+/**
+ * @brief Search ANN using the constructed index.
+ *
+ * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example.
+ *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`:
+ * @code{.cpp}
+ *   ...
+ *   // Create a pooling memory resource with a pre-defined initial size.
+ *   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr(
+ *     rmm::mr::get_current_device_resource(), 1024 * 1024);
+ *   // use default search parameters
+ *   ivf_flat::search_params search_params;
+ *   // Use the same allocator across multiple searches to reduce the number of
+ *   // cuda memory allocations
+ *   ivf_flat::search(handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr);
+ *   ivf_flat::search(handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr);
+ *   ivf_flat::search(handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr);
+ *   ...
+ * @endcode
+ * The exact size of the temporary buffer depends on multiple factors and is an implementation
+ * detail. However, you can safely specify a small initial size for the memory pool, so that only a
+ * few allocations happen to grow it during the first invocations of the `search`.
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] handle
+ * @param[in] params configure the search
+ * @param[in] index ivf-flat constructed index
+ * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
+ * @param[in] n_queries the batch size
+ * @param[in] k the number of neighbors to find for each query.
+ * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
+ * @param[in] mr an optional memory resource to use across the searches (you can provide a large
+ * enough memory pool here to avoid memory allocations within search).
+ */
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<T, IdxT>& index,
+            const T* queries,
+            uint32_t n_queries,
+            uint32_t k,
+            IdxT* neighbors,
+            float* distances,
+            rmm::mr::device_memory_resource* mr = nullptr)
+{
+  return raft::neighbors::ivf_flat::detail::search(
+    handle, params, index, queries, n_queries, k, neighbors, distances, mr);
+}
+
+/**
+ * @ingroup ivf_flat
+ * @{
+ */
+
+/**
+ * @brief Search ANN using the constructed index.
+ *
+ * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example.
+ *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`:
+ * @code{.cpp}
+ *   ...
+ *   // use default search parameters
+ *   ivf_flat::search_params search_params;
+ *   // Use the same allocator across multiple searches to reduce the number of
+ *   // cuda memory allocations
+ *   ivf_flat::search(handle, search_params, index, queries1, out_inds1, out_dists1);
+ *   ivf_flat::search(handle, search_params, index, queries2, out_inds2, out_dists2);
+ *   ivf_flat::search(handle, search_params, index, queries3, out_inds3, out_dists3);
+ *   ...
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] handle
+ * @param[in] params configure the search
+ * @param[in] index ivf-flat constructed index
+ * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
+ * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
+ */
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<T, IdxT>& index,
+            raft::device_matrix_view<const T, IdxT, row_major> queries,
+            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+            raft::device_matrix_view<float, IdxT, row_major> distances)
+{
+  RAFT_EXPECTS(
+    queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
+    "Number of rows in output neighbors and distances matrices must equal the number of queries.");
+
+  RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
+               "Number of columns in output neighbors and distances matrices must be equal");
+
+  RAFT_EXPECTS(queries.extent(1) == index.dim(),
+               "Number of query dimensions should equal number of dimensions in the index.");
+
+  return search(handle,
+                params,
+                index,
+                queries.data_handle(),
+                static_cast<std::uint32_t>(queries.extent(0)),
+                static_cast<std::uint32_t>(neighbors.extent(1)),
+                neighbors.data_handle(),
+                distances.data_handle(),
+                nullptr);
+}
+
+/** @} */
+
+}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/include/raft/neighbors/ivf_flat.cuh b/cpp/include/raft/neighbors/ivf_flat.cuh
index f12062f851..8fd9628a41 100644
--- a/cpp/include/raft/neighbors/ivf_flat.cuh
+++ b/cpp/include/raft/neighbors/ivf_flat.cuh
@@ -13,459 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/detail/ivf_flat_build.cuh>
-#include <raft/neighbors/detail/ivf_flat_search.cuh>
-#include <raft/neighbors/ivf_flat_serialize.cuh>
-#include <raft/neighbors/ivf_flat_types.hpp>
-
-#include <raft/core/device_resources.hpp>
-
-#include <raft/core/device_mdspan.hpp>
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-namespace raft::neighbors::ivf_flat {
-
-/**
- * @brief Build the index from the dataset for efficient search.
- *
- * NB: Currently, the following distance metrics are supported:
- * - L2Expanded
- * - L2Unexpanded
- * - InnerProduct
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   // use default index parameters
- *   ivf_flat::index_params index_params;
- *   // create and fill the index from a [N, D] dataset
- *   auto index = ivf_flat::build(handle, index_params, dataset, N, D);
- *   // use default search parameters
- *   ivf_flat::search_params search_params;
- *   // search K nearest neighbours for each of the N queries
- *   ivf_flat::search(handle, search_params, index, queries, N, K, out_inds, out_dists);
- * @endcode
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] params configure the index building
- * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim]
- * @param[in] n_rows the number of samples
- * @param[in] dim the dimensionality of the data
- *
- * @return the constructed ivf-flat index
- */
-template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
-           const index_params& params,
-           const T* dataset,
-           IdxT n_rows,
-           uint32_t dim) -> index<T, IdxT>
-{
-  return raft::neighbors::ivf_flat::detail::build(handle, params, dataset, n_rows, dim);
-}
-
-/**
- * @defgroup ivf_flat IVF Flat Algorithm
- * @{
- */
-
-/**
- * @brief Build the index from the dataset for efficient search.
- *
- * NB: Currently, the following distance metrics are supported:
- * - L2Expanded
- * - L2Unexpanded
- * - InnerProduct
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   // use default index parameters
- *   ivf_flat::index_params index_params;
- *   // create and fill the index from a [N, D] dataset
- *   auto index = ivf_flat::build(handle, dataset, index_params);
- *   // use default search parameters
- *   ivf_flat::search_params search_params;
- *   // search K nearest neighbours for each of the N queries
- *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
- * @endcode
- *
- * @tparam value_t data element type
- * @tparam idx_t type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] params configure the index building
- * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim]
- *
- * @return the constructed ivf-flat index
- */
-template <typename value_t, typename idx_t>
-auto build(raft::device_resources const& handle,
-           const index_params& params,
-           raft::device_matrix_view<const value_t, idx_t, row_major> dataset)
-  -> index<value_t, idx_t>
-{
-  return raft::neighbors::ivf_flat::detail::build(handle,
-                                                  params,
-                                                  dataset.data_handle(),
-                                                  static_cast<idx_t>(dataset.extent(0)),
-                                                  static_cast<idx_t>(dataset.extent(1)));
-}
-
-/**
- * @brief Build the index from the dataset for efficient search.
- *
- * NB: Currently, the following distance metrics are supported:
- * - L2Expanded
- * - L2Unexpanded
- * - InnerProduct
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   // use default index parameters
- *   ivf_flat::index_params index_params;
- *   // create and fill the index from a [N, D] dataset
- *   ivf_flat::index<decltype(dataset::value_type), decltype(dataset::index_type)> index;
- *   ivf_flat::build(handle, dataset, index_params, index);
- *   // use default search parameters
- *   ivf_flat::search_params search_params;
- *   // search K nearest neighbours for each of the N queries
- *   ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
- * @endcode
- *
- * @tparam value_t data element type
- * @tparam idx_t type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] params configure the index building
- * @param[in] dataset raft::device_matrix_view to a row-major matrix [n_rows, dim]
- * @param[out] idx reference to ivf_flat::index
- *
- */
-template <typename value_t, typename idx_t>
-void build(raft::device_resources const& handle,
-           const index_params& params,
-           raft::device_matrix_view<const value_t, idx_t, row_major> dataset,
-           raft::neighbors::ivf_flat::index<value_t, idx_t>& idx)
-{
-  idx = raft::neighbors::ivf_flat::detail::build(handle,
-                                                 params,
-                                                 dataset.data_handle(),
-                                                 static_cast<idx_t>(dataset.extent(0)),
-                                                 static_cast<idx_t>(dataset.extent(1)));
-}
-
-/** @} */
-
-/**
- * @brief Build a new index containing the data of the original plus new extra vectors.
- *
- * Implementation note:
- *    The new data is clustered according to existing kmeans clusters, then the cluster
- *    centers are adjusted to match the newly labeled data.
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   ivf_flat::index_params index_params;
- *   index_params.add_data_on_build = false;      // don't populate index on build
- *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
- *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_flat::build(handle, index_params, dataset, N, D);
- *   // fill the index with the data
- *   auto index = ivf_flat::extend(handle, index_empty, dataset, nullptr, N);
- * @endcode
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] orig_index original index
- * @param[in] new_vectors a device pointer to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices a device pointer to a vector of indices [n_rows].
- *    If the original index is empty (`orig_index.size() == 0`), you can pass `nullptr`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[in] n_rows number of rows in `new_vectors`
- *
- * @return the constructed extended ivf-flat index
- */
-template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
-            const index<T, IdxT>& orig_index,
-            const T* new_vectors,
-            const IdxT* new_indices,
-            IdxT n_rows) -> index<T, IdxT>
-{
-  return raft::neighbors::ivf_flat::detail::extend(
-    handle, orig_index, new_vectors, new_indices, n_rows);
-}
-
-/**
- * @ingroup ivf_flat
- * @{
- */
-
-/**
- * @brief Build a new index containing the data of the original plus new extra vectors.
- *
- * Implementation note:
- *    The new data is clustered according to existing kmeans clusters, then the cluster
- *    centers are adjusted to match the newly labeled data.
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   ivf_flat::index_params index_params;
- *   index_params.add_data_on_build = false;      // don't populate index on build
- *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
- *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset);
- *   // fill the index with the data
- *   std::optional<raft::device_vector_view<const idx_t, idx_t>> no_op = std::nullopt;
- *   auto index = ivf_flat::extend(handle, index_empty, no_op, dataset);
- * @endcode
- *
- * @tparam value_t data element type
- * @tparam idx_t type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
- *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[in] orig_index original index
- *
- * @return the constructed extended ivf-flat index
- */
-template <typename value_t, typename idx_t>
-auto extend(raft::device_resources const& handle,
-            raft::device_matrix_view<const value_t, idx_t, row_major> new_vectors,
-            std::optional<raft::device_vector_view<const idx_t, idx_t>> new_indices,
-            const index<value_t, idx_t>& orig_index) -> index<value_t, idx_t>
-{
-  return extend<value_t, idx_t>(
-    handle,
-    orig_index,
-    new_vectors.data_handle(),
-    new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
-    new_vectors.extent(0));
-}
-
-/** @} */
-
-/**
- * @brief Extend the index in-place with the new data.
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   ivf_flat::index_params index_params;
- *   index_params.add_data_on_build = false;      // don't populate index on build
- *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
- *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_flat::build(handle, index_params, dataset, N, D);
- *   // fill the index with the data
- *   ivf_flat::extend(handle, index_empty, dataset, nullptr, N);
- * @endcode
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param handle
- * @param[inout] index
- * @param[in] new_vectors a device pointer to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices a device pointer to a vector of indices [n_rows].
- *    If the original index is empty (`orig_index.size() == 0`), you can pass `nullptr`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[in] n_rows the number of samples
- */
-template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
-            index<T, IdxT>* index,
-            const T* new_vectors,
-            const IdxT* new_indices,
-            IdxT n_rows)
-{
-  raft::neighbors::ivf_flat::detail::extend(handle, index, new_vectors, new_indices, n_rows);
-}
-
-/**
- * @ingroup ivf_flat
- * @{
- */
-
-/**
- * @brief Extend the index in-place with the new data.
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   ivf_flat::index_params index_params;
- *   index_params.add_data_on_build = false;      // don't populate index on build
- *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
- *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_flat::build(handle, index_params, dataset);
- *   // fill the index with the data
- *   std::optional<raft::device_vector_view<const idx_t, idx_t>> no_op = std::nullopt;
- *   ivf_flat::extend(handle, dataset, no_opt, &index_empty);
- * @endcode
- *
- * @tparam value_t data element type
- * @tparam idx_t type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
- * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows].
- *    If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[inout] index pointer to index, to be overwritten in-place
- */
-template <typename value_t, typename idx_t>
-void extend(raft::device_resources const& handle,
-            raft::device_matrix_view<const value_t, idx_t, row_major> new_vectors,
-            std::optional<raft::device_vector_view<const idx_t, idx_t>> new_indices,
-            index<value_t, idx_t>* index)
-{
-  extend(handle,
-         index,
-         new_vectors.data_handle(),
-         new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
-         static_cast<idx_t>(new_vectors.extent(0)));
-}
-
-/** @} */
-
-/**
- * @brief Search ANN using the constructed index.
- *
- * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example.
- *
- * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
- * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
- * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`:
- * @code{.cpp}
- *   ...
- *   // Create a pooling memory resource with a pre-defined initial size.
- *   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr(
- *     rmm::mr::get_current_device_resource(), 1024 * 1024);
- *   // use default search parameters
- *   ivf_flat::search_params search_params;
- *   // Use the same allocator across multiple searches to reduce the number of
- *   // cuda memory allocations
- *   ivf_flat::search(handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr);
- *   ivf_flat::search(handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr);
- *   ivf_flat::search(handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr);
- *   ...
- * @endcode
- * The exact size of the temporary buffer depends on multiple factors and is an implementation
- * detail. However, you can safely specify a small initial size for the memory pool, so that only a
- * few allocations happen to grow it during the first invocations of the `search`.
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices
- *
- * @param[in] handle
- * @param[in] params configure the search
- * @param[in] index ivf-flat constructed index
- * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
- * @param[in] n_queries the batch size
- * @param[in] k the number of neighbors to find for each query.
- * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
- * [n_queries, k]
- * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] mr an optional memory resource to use across the searches (you can provide a large
- * enough memory pool here to avoid memory allocations within search).
- */
-template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
-            const search_params& params,
-            const index<T, IdxT>& index,
-            const T* queries,
-            uint32_t n_queries,
-            uint32_t k,
-            IdxT* neighbors,
-            float* distances,
-            rmm::mr::device_memory_resource* mr = nullptr)
-{
-  return raft::neighbors::ivf_flat::detail::search(
-    handle, params, index, queries, n_queries, k, neighbors, distances, mr);
-}
-
-/**
- * @ingroup ivf_flat
- * @{
- */
-
-/**
- * @brief Search ANN using the constructed index.
- *
- * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example.
- *
- * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
- * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
- * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`:
- * @code{.cpp}
- *   ...
- *   // use default search parameters
- *   ivf_flat::search_params search_params;
- *   // Use the same allocator across multiple searches to reduce the number of
- *   // cuda memory allocations
- *   ivf_flat::search(handle, search_params, index, queries1, out_inds1, out_dists1);
- *   ivf_flat::search(handle, search_params, index, queries2, out_inds2, out_dists2);
- *   ivf_flat::search(handle, search_params, index, queries3, out_inds3, out_dists3);
- *   ...
- * @endcode
- *
- * @tparam value_t data element type
- * @tparam idx_t type of the indices
- *
- * @param[in] handle
- * @param[in] params configure the search
- * @param[in] index ivf-flat constructed index
- * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
- * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
- * [n_queries, k]
- * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- */
-template <typename value_t, typename idx_t>
-void search(raft::device_resources const& handle,
-            const search_params& params,
-            const index<value_t, idx_t>& index,
-            raft::device_matrix_view<const value_t, idx_t, row_major> queries,
-            raft::device_matrix_view<idx_t, idx_t, row_major> neighbors,
-            raft::device_matrix_view<float, idx_t, row_major> distances)
-{
-  RAFT_EXPECTS(
-    queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
-    "Number of rows in output neighbors and distances matrices must equal the number of queries.");
-
-  RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
-               "Number of columns in output neighbors and distances matrices must be equal");
-
-  RAFT_EXPECTS(queries.extent(1) == index.dim(),
-               "Number of query dimensions should equal number of dimensions in the index.");
-
-  return search(handle,
-                params,
-                index,
-                queries.data_handle(),
-                static_cast<std::uint32_t>(queries.extent(0)),
-                static_cast<std::uint32_t>(neighbors.extent(1)),
-                neighbors.data_handle(),
-                distances.data_handle(),
-                nullptr);
-}
-
-/** @} */
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "ivf_flat-inl.cuh"
+#endif
 
-}  // namespace raft::neighbors::ivf_flat
+#ifdef RAFT_COMPILED
+#include "ivf_flat-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp
index 011adcffff..c7abe83f8a 100644
--- a/cpp/include/raft/neighbors/ivf_flat_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp
@@ -27,6 +27,7 @@
 #include <raft/neighbors/ivf_list_types.hpp>
 #include <raft/util/integer_utils.hpp>
 
+#include <algorithm>  // std::max
 #include <memory>
 #include <optional>
 #include <thrust/fill.h>
@@ -379,10 +380,11 @@ struct index : ann::index {
   {
     // TODO: consider padding the dimensions and fixing veclen to its maximum possible value as a
     // template parameter (https://github.com/rapidsai/raft/issues/711)
-    uint32_t veclen = 16 / sizeof(T);
-    while (dim % veclen != 0) {
-      veclen = veclen >> 1;
-    }
+
+    // NOTE: keep this consistent with the select_interleaved_scan_kernel logic
+    // in detail/ivf_flat_interleaved_scan-inl.cuh.
+    uint32_t veclen = std::max<uint32_t>(1, 16 / sizeof(T));
+    if (dim % veclen != 0) { veclen = 1; }
     return veclen;
   }
 };
diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
new file mode 100644
index 0000000000..4b9b0673d4
--- /dev/null
+++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                                // int64_t
+
+#include <raft/core/device_mdspan.hpp>            // raft::device_matrix_view
+#include <raft/core/device_resources.hpp>         // raft::device_resources
+#include <raft/neighbors/ivf_pq_types.hpp>        // raft::neighbors::ivf_pq::index
+#include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
+#include <rmm/mr/device/per_device_resource.hpp>  // rmm::mr::device_memory_resource
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors::ivf_pq {
+
+template <typename T, typename IdxT = uint32_t>
+index<IdxT> build(raft::device_resources const& handle,
+                  const index_params& params,
+                  raft::device_matrix_view<const T, IdxT, row_major> dataset) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+index<IdxT> extend(raft::device_resources const& handle,
+                   raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+                   std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,
+                   const index<IdxT>& idx) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,
+            index<IdxT>* idx) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<IdxT>& idx,
+            raft::device_matrix_view<const T, IdxT, row_major> queries,
+            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+            raft::device_matrix_view<float, IdxT, row_major> distances) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT = uint32_t>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           const T* dataset,
+           IdxT n_rows,
+           uint32_t dim) -> index<IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            const index<IdxT>& idx,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) -> index<IdxT> RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            index<IdxT>* idx,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) RAFT_EXPLICIT;
+
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const raft::neighbors::ivf_pq::search_params& params,
+            const index<IdxT>& idx,
+            const T* queries,
+            uint32_t n_queries,
+            uint32_t k,
+            IdxT* neighbors,
+            float* distances,
+            rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors::ivf_pq
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                        \
+  extern template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
+    raft::device_resources const& handle,                                                       \
+    const raft::neighbors::ivf_pq::index_params& params,                                        \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset);                                \
+                                                                                                \
+  extern template auto raft::neighbors::ivf_pq::build(                                          \
+    raft::device_resources const& handle,                                                       \
+    const raft::neighbors::ivf_pq::index_params& params,                                        \
+    const T* dataset,                                                                           \
+    IdxT n_rows,                                                                                \
+    uint32_t dim)                                                                               \
+    ->raft::neighbors::ivf_pq::index<IdxT>;
+
+instantiate_raft_neighbors_ivf_pq_build(float, int64_t);
+instantiate_raft_neighbors_ivf_pq_build(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_build
+
+#define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                        \
+  extern template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
+    raft::device_resources const& handle,                                                        \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                              \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                            \
+                                                                                                 \
+  extern template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                        \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                              \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,            \
+    raft::neighbors::ivf_pq::index<IdxT>* idx);                                                  \
+                                                                                                 \
+  extern template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                                             \
+    const T* new_vectors,                                                                        \
+    const IdxT* new_indices,                                                                     \
+    IdxT n_rows)                                                                                 \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                                      \
+                                                                                                 \
+  extern template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                        \
+    raft::neighbors::ivf_pq::index<IdxT>* idx,                                                   \
+    const T* new_vectors,                                                                        \
+    const IdxT* new_indices,                                                                     \
+    IdxT n_rows);
+
+instantiate_raft_neighbors_ivf_pq_extend(float, int64_t);
+instantiate_raft_neighbors_ivf_pq_extend(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_extend
+
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
+  extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
+    raft::device_matrix_view<float, IdxT, row_major> distances); \
+                                                                 \
+  extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    const T* queries,                                            \
+    uint32_t n_queries,                                          \
+    uint32_t k,                                                  \
+    IdxT* neighbors,                                             \
+    float* distances,                                            \
+    rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_neighbors_ivf_pq_search(float, int64_t);
+instantiate_raft_neighbors_ivf_pq_search(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_pq_search(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_search
diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
new file mode 100644
index 0000000000..dfc24e8214
--- /dev/null
+++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/neighbors/detail/ivf_pq_build.cuh>
+#include <raft/neighbors/detail/ivf_pq_search.cuh>
+#include <raft/neighbors/ivf_pq_serialize.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+namespace raft::neighbors::ivf_pq {
+
+/**
+ * @defgroup ivf_pq IVF PQ Algorithm
+ * @{
+ */
+
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * NB: Currently, the following distance metrics are supported:
+ * - L2Expanded
+ * - L2Unexpanded
+ * - InnerProduct
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] params configure the index building
+ * @param[in] dataset a device matrix view to a row-major matrix [n_rows, dim]
+ *
+ * @return the constructed ivf-pq index
+ */
+template <typename T, typename IdxT = uint32_t>
+index<IdxT> build(raft::device_resources const& handle,
+                  const index_params& params,
+                  raft::device_matrix_view<const T, IdxT, row_major> dataset)
+{
+  IdxT n_rows = dataset.extent(0);
+  IdxT dim    = dataset.extent(1);
+  return detail::build(handle, params, dataset.data_handle(), n_rows, dim);
+}
+
+/**
+ * @brief Extend the index with the new data.
+ * *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
+ * @param[in] new_indices a device vector view to a vector of indices [n_rows].
+ *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[inout] idx
+ */
+template <typename T, typename IdxT>
+index<IdxT> extend(raft::device_resources const& handle,
+                   raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+                   std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+                   const index<IdxT>& idx)
+{
+  ASSERT(new_vectors.extent(1) == idx.dim(),
+         "new_vectors should have the same dimension as the index");
+
+  IdxT n_rows = new_vectors.extent(0);
+  if (new_indices.has_value()) {
+    ASSERT(n_rows == new_indices.value().extent(0),
+           "new_vectors and new_indices have different number of rows");
+  }
+
+  return detail::extend(handle,
+                        idx,
+                        new_vectors.data_handle(),
+                        new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                        n_rows);
+}
+
+/**
+ * @brief Extend the index with the new data.
+ * *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
+ * @param[in] new_indices a device vector view to a vector of indices [n_rows].
+ *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[inout] idx
+ */
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
+            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
+            index<IdxT>* idx)
+{
+  ASSERT(new_vectors.extent(1) == idx->dim(),
+         "new_vectors should have the same dimension as the index");
+
+  IdxT n_rows = new_vectors.extent(0);
+  if (new_indices.has_value()) {
+    ASSERT(n_rows == new_indices.value().extent(0),
+           "new_vectors and new_indices have different number of rows");
+  }
+
+  *idx = detail::extend(handle,
+                        *idx,
+                        new_vectors.data_handle(),
+                        new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                        n_rows);
+}
+
+/**
+ * @brief Search ANN using the constructed index.
+ *
+ * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
+ *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`.
+ * The exact size of the temporary buffer depends on multiple factors and is an implementation
+ * detail. However, you can safely specify a small initial size for the memory pool, so that only a
+ * few allocations happen to grow it during the first invocations of the `search`.
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] handle
+ * @param[in] params configure the search
+ * @param[in] idx ivf-pq constructed index
+ * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()]
+ * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
+ * k]
+ */
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<IdxT>& idx,
+            raft::device_matrix_view<const T, IdxT, row_major> queries,
+            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+            raft::device_matrix_view<float, IdxT, row_major> distances)
+{
+  RAFT_EXPECTS(
+    queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
+    "Number of rows in output neighbors and distances matrices must equal the number of queries.");
+
+  RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
+               "Number of columns in output neighbors and distances matrices must equal k");
+
+  RAFT_EXPECTS(queries.extent(1) == idx.dim(),
+               "Number of query dimensions should equal number of dimensions in the index.");
+
+  std::uint32_t k = neighbors.extent(1);
+  return detail::search(handle,
+                        params,
+                        idx,
+                        queries.data_handle(),
+                        static_cast<std::uint32_t>(queries.extent(0)),
+                        k,
+                        neighbors.data_handle(),
+                        distances.data_handle(),
+                        handle.get_workspace_resource());
+}
+
+/** @} */  // end group ivf_pq
+
+/**
+ * @brief Build the index from the dataset for efficient search.
+ *
+ * NB: Currently, the following distance metrics are supported:
+ * - L2Expanded
+ * - L2Unexpanded
+ * - InnerProduct
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   // use default index parameters
+ *   ivf_pq::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = ivf_pq::build(handle, index_params, dataset, N, D);
+ *   // use default search parameters
+ *   ivf_pq::search_params search_params;
+ *   // search K nearest neighbours for each of the N queries
+ *   ivf_pq::search(handle, search_params, index, queries, N, K, out_inds, out_dists);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[in] params configure the index building
+ * @param[in] dataset a device/host pointer to a row-major matrix [n_rows, dim]
+ * @param[in] n_rows the number of samples
+ * @param[in] dim the dimensionality of the data
+ *
+ * @return the constructed ivf-pq index
+ */
+template <typename T, typename IdxT = uint32_t>
+auto build(raft::device_resources const& handle,
+           const index_params& params,
+           const T* dataset,
+           IdxT n_rows,
+           uint32_t dim) -> index<IdxT>
+{
+  return detail::build(handle, params, dataset, n_rows, dim);
+}
+
+/**
+ * @brief Build a new index containing the data of the original plus new extra vectors.
+ *
+ * Implementation note:
+ *    The new data is clustered according to existing kmeans clusters, the cluster
+ *    centers are unchanged.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   ivf_pq::index_params index_params;
+ *   index_params.add_data_on_build = false;      // don't populate index on build
+ *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
+ *   // train the index from a [N, D] dataset
+ *   auto index_empty = ivf_pq::build(handle, index_params, dataset, N, D);
+ *   // fill the index with the data
+ *   auto index = ivf_pq::extend(handle, index_empty, dataset, nullptr, N);
+ * @endcode
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[inout] idx original index
+ * @param[in] new_vectors a device/host pointer to a row-major matrix [n_rows, idx.dim()]
+ * @param[in] new_indices a device/host pointer to a vector of indices [n_rows].
+ *    If the original index is empty (`idx.size() == 0`), you can pass `nullptr`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[in] n_rows the number of samples
+ *
+ * @return the constructed extended ivf-pq index
+ */
+template <typename T, typename IdxT>
+auto extend(raft::device_resources const& handle,
+            const index<IdxT>& idx,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows) -> index<IdxT>
+{
+  return detail::extend(handle, idx, new_vectors, new_indices, n_rows);
+}
+
+/**
+ * @brief Extend the index with the new data.
+ * *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] handle
+ * @param[inout] idx
+ * @param[in] new_vectors a device/host pointer to a row-major matrix [n_rows, idx.dim()]
+ * @param[in] new_indices a device/host pointer to a vector of indices [n_rows].
+ *    If the original index is empty (`idx.size() == 0`), you can pass `nullptr`
+ *    here to imply a continuous range `[0...n_rows)`.
+ * @param[in] n_rows the number of samples
+ */
+template <typename T, typename IdxT>
+void extend(raft::device_resources const& handle,
+            index<IdxT>* idx,
+            const T* new_vectors,
+            const IdxT* new_indices,
+            IdxT n_rows)
+{
+  detail::extend(handle, idx, new_vectors, new_indices, n_rows);
+}
+
+/**
+ * @brief Search ANN using the constructed index.
+ *
+ * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
+ *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`:
+ * @code{.cpp}
+ *   ...
+ *   // Create a pooling memory resource with a pre-defined initial size.
+ *   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr(
+ *     rmm::mr::get_current_device_resource(), 1024 * 1024);
+ *   // use default search parameters
+ *   ivf_pq::search_params search_params;
+ *   // Use the same allocator across multiple searches to reduce the number of
+ *   // cuda memory allocations
+ *   ivf_pq::search(handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr);
+ *   ivf_pq::search(handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr);
+ *   ivf_pq::search(handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr);
+ *   ...
+ * @endcode
+ * The exact size of the temporary buffer depends on multiple factors and is an implementation
+ * detail. However, you can safely specify a small initial size for the memory pool, so that only a
+ * few allocations happen to grow it during the first invocations of the `search`.
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] handle
+ * @param[in] params configure the search
+ * @param[in] idx ivf-pq constructed index
+ * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
+ * @param[in] n_queries the batch size
+ * @param[in] k the number of neighbors to find for each query.
+ * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
+ * @param[in] mr an optional memory resource to use across the searches (you can provide a large
+ * enough memory pool here to avoid memory allocations within search).
+ */
+template <typename T, typename IdxT>
+void search(raft::device_resources const& handle,
+            const search_params& params,
+            const index<IdxT>& idx,
+            const T* queries,
+            uint32_t n_queries,
+            uint32_t k,
+            IdxT* neighbors,
+            float* distances,
+            rmm::mr::device_memory_resource* mr = nullptr)
+{
+  return detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr);
+}
+
+}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/include/raft/neighbors/ivf_pq.cuh b/cpp/include/raft/neighbors/ivf_pq.cuh
index dfc24e8214..2d20638f00 100644
--- a/cpp/include/raft/neighbors/ivf_pq.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq.cuh
@@ -13,343 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/detail/ivf_pq_build.cuh>
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/ivf_pq_serialize.cuh>
-#include <raft/neighbors/ivf_pq_types.hpp>
-
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-namespace raft::neighbors::ivf_pq {
-
-/**
- * @defgroup ivf_pq IVF PQ Algorithm
- * @{
- */
-
-/**
- * @brief Build the index from the dataset for efficient search.
- *
- * NB: Currently, the following distance metrics are supported:
- * - L2Expanded
- * - L2Unexpanded
- * - InnerProduct
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] params configure the index building
- * @param[in] dataset a device matrix view to a row-major matrix [n_rows, dim]
- *
- * @return the constructed ivf-pq index
- */
-template <typename T, typename IdxT = uint32_t>
-index<IdxT> build(raft::device_resources const& handle,
-                  const index_params& params,
-                  raft::device_matrix_view<const T, IdxT, row_major> dataset)
-{
-  IdxT n_rows = dataset.extent(0);
-  IdxT dim    = dataset.extent(1);
-  return detail::build(handle, params, dataset.data_handle(), n_rows, dim);
-}
-
-/**
- * @brief Extend the index with the new data.
- * *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device vector view to a vector of indices [n_rows].
- *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[inout] idx
- */
-template <typename T, typename IdxT>
-index<IdxT> extend(raft::device_resources const& handle,
-                   raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
-                   std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
-                   const index<IdxT>& idx)
-{
-  ASSERT(new_vectors.extent(1) == idx.dim(),
-         "new_vectors should have the same dimension as the index");
-
-  IdxT n_rows = new_vectors.extent(0);
-  if (new_indices.has_value()) {
-    ASSERT(n_rows == new_indices.value().extent(0),
-           "new_vectors and new_indices have different number of rows");
-  }
-
-  return detail::extend(handle,
-                        idx,
-                        new_vectors.data_handle(),
-                        new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
-                        n_rows);
-}
-
-/**
- * @brief Extend the index with the new data.
- * *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device vector view to a vector of indices [n_rows].
- *    If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[inout] idx
- */
-template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
-            raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
-            std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
-            index<IdxT>* idx)
-{
-  ASSERT(new_vectors.extent(1) == idx->dim(),
-         "new_vectors should have the same dimension as the index");
-
-  IdxT n_rows = new_vectors.extent(0);
-  if (new_indices.has_value()) {
-    ASSERT(n_rows == new_indices.value().extent(0),
-           "new_vectors and new_indices have different number of rows");
-  }
-
-  *idx = detail::extend(handle,
-                        *idx,
-                        new_vectors.data_handle(),
-                        new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
-                        n_rows);
-}
-
-/**
- * @brief Search ANN using the constructed index.
- *
- * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
- *
- * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
- * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
- * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`.
- * The exact size of the temporary buffer depends on multiple factors and is an implementation
- * detail. However, you can safely specify a small initial size for the memory pool, so that only a
- * few allocations happen to grow it during the first invocations of the `search`.
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices
- *
- * @param[in] handle
- * @param[in] params configure the search
- * @param[in] idx ivf-pq constructed index
- * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()]
- * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset
- * [n_queries, k]
- * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
- * k]
- */
-template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
-            const search_params& params,
-            const index<IdxT>& idx,
-            raft::device_matrix_view<const T, IdxT, row_major> queries,
-            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-            raft::device_matrix_view<float, IdxT, row_major> distances)
-{
-  RAFT_EXPECTS(
-    queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
-    "Number of rows in output neighbors and distances matrices must equal the number of queries.");
-
-  RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
-               "Number of columns in output neighbors and distances matrices must equal k");
-
-  RAFT_EXPECTS(queries.extent(1) == idx.dim(),
-               "Number of query dimensions should equal number of dimensions in the index.");
-
-  std::uint32_t k = neighbors.extent(1);
-  return detail::search(handle,
-                        params,
-                        idx,
-                        queries.data_handle(),
-                        static_cast<std::uint32_t>(queries.extent(0)),
-                        k,
-                        neighbors.data_handle(),
-                        distances.data_handle(),
-                        handle.get_workspace_resource());
-}
-
-/** @} */  // end group ivf_pq
-
-/**
- * @brief Build the index from the dataset for efficient search.
- *
- * NB: Currently, the following distance metrics are supported:
- * - L2Expanded
- * - L2Unexpanded
- * - InnerProduct
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   // use default index parameters
- *   ivf_pq::index_params index_params;
- *   // create and fill the index from a [N, D] dataset
- *   auto index = ivf_pq::build(handle, index_params, dataset, N, D);
- *   // use default search parameters
- *   ivf_pq::search_params search_params;
- *   // search K nearest neighbours for each of the N queries
- *   ivf_pq::search(handle, search_params, index, queries, N, K, out_inds, out_dists);
- * @endcode
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[in] params configure the index building
- * @param[in] dataset a device/host pointer to a row-major matrix [n_rows, dim]
- * @param[in] n_rows the number of samples
- * @param[in] dim the dimensionality of the data
- *
- * @return the constructed ivf-pq index
- */
-template <typename T, typename IdxT = uint32_t>
-auto build(raft::device_resources const& handle,
-           const index_params& params,
-           const T* dataset,
-           IdxT n_rows,
-           uint32_t dim) -> index<IdxT>
-{
-  return detail::build(handle, params, dataset, n_rows, dim);
-}
-
-/**
- * @brief Build a new index containing the data of the original plus new extra vectors.
- *
- * Implementation note:
- *    The new data is clustered according to existing kmeans clusters, the cluster
- *    centers are unchanged.
- *
- * Usage example:
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   ivf_pq::index_params index_params;
- *   index_params.add_data_on_build = false;      // don't populate index on build
- *   index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training
- *   // train the index from a [N, D] dataset
- *   auto index_empty = ivf_pq::build(handle, index_params, dataset, N, D);
- *   // fill the index with the data
- *   auto index = ivf_pq::extend(handle, index_empty, dataset, nullptr, N);
- * @endcode
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[inout] idx original index
- * @param[in] new_vectors a device/host pointer to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device/host pointer to a vector of indices [n_rows].
- *    If the original index is empty (`idx.size() == 0`), you can pass `nullptr`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[in] n_rows the number of samples
- *
- * @return the constructed extended ivf-pq index
- */
-template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
-            const index<IdxT>& idx,
-            const T* new_vectors,
-            const IdxT* new_indices,
-            IdxT n_rows) -> index<IdxT>
-{
-  return detail::extend(handle, idx, new_vectors, new_indices, n_rows);
-}
-
-/**
- * @brief Extend the index with the new data.
- * *
- * @tparam T data element type
- * @tparam IdxT type of the indices in the source dataset
- *
- * @param[in] handle
- * @param[inout] idx
- * @param[in] new_vectors a device/host pointer to a row-major matrix [n_rows, idx.dim()]
- * @param[in] new_indices a device/host pointer to a vector of indices [n_rows].
- *    If the original index is empty (`idx.size() == 0`), you can pass `nullptr`
- *    here to imply a continuous range `[0...n_rows)`.
- * @param[in] n_rows the number of samples
- */
-template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
-            index<IdxT>* idx,
-            const T* new_vectors,
-            const IdxT* new_indices,
-            IdxT n_rows)
-{
-  detail::extend(handle, idx, new_vectors, new_indices, n_rows);
-}
-
-/**
- * @brief Search ANN using the constructed index.
- *
- * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
- *
- * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
- * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
- * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
- * eliminate entirely allocations happening within `search`:
- * @code{.cpp}
- *   ...
- *   // Create a pooling memory resource with a pre-defined initial size.
- *   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr(
- *     rmm::mr::get_current_device_resource(), 1024 * 1024);
- *   // use default search parameters
- *   ivf_pq::search_params search_params;
- *   // Use the same allocator across multiple searches to reduce the number of
- *   // cuda memory allocations
- *   ivf_pq::search(handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr);
- *   ivf_pq::search(handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr);
- *   ivf_pq::search(handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr);
- *   ...
- * @endcode
- * The exact size of the temporary buffer depends on multiple factors and is an implementation
- * detail. However, you can safely specify a small initial size for the memory pool, so that only a
- * few allocations happen to grow it during the first invocations of the `search`.
- *
- * @tparam T data element type
- * @tparam IdxT type of the indices
- *
- * @param[in] handle
- * @param[in] params configure the search
- * @param[in] idx ivf-pq constructed index
- * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()]
- * @param[in] n_queries the batch size
- * @param[in] k the number of neighbors to find for each query.
- * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset
- * [n_queries, k]
- * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
- * @param[in] mr an optional memory resource to use across the searches (you can provide a large
- * enough memory pool here to avoid memory allocations within search).
- */
-template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
-            const search_params& params,
-            const index<IdxT>& idx,
-            const T* queries,
-            uint32_t n_queries,
-            uint32_t k,
-            IdxT* neighbors,
-            float* distances,
-            rmm::mr::device_memory_resource* mr = nullptr)
-{
-  return detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr);
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "ivf_pq-inl.cuh"
+#endif
 
-}  // namespace raft::neighbors::ivf_pq
+#ifdef RAFT_COMPILED
+#include "ivf_pq-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh
new file mode 100644
index 0000000000..0ba2d2c5ab
--- /dev/null
+++ b/cpp/include/raft/neighbors/refine-ext.cuh
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>                           // int64_t
+
+#include <raft/core/device_mdspan.hpp>       // raft::device_matrix_view
+#include <raft/core/device_resources.hpp>    // raft::device_resources
+#include <raft/core/host_mdspan.hpp>         // // raft::host_matrix_view
+#include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
+#include <raft/util/raft_explicit.hpp>       // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+namespace raft::neighbors {
+
+template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
+void refine(raft::device_resources const& handle,
+            raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
+            raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
+            raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
+            raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
+            raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,
+            raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
+  RAFT_EXPLICIT;
+
+template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
+void refine(raft::device_resources const& handle,
+            raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,
+            raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,
+            raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
+            raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,
+            raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,
+            raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
+  RAFT_EXPLICIT;
+
+}  // namespace raft::neighbors
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)       \
+  extern template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>( \
+    raft::device_resources const& handle,                                              \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,             \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,             \
+    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,  \
+    raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                    \
+    raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,             \
+    raft::distance::DistanceType metric);                                              \
+                                                                                       \
+  extern template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>( \
+    raft::device_resources const& handle,                                              \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,               \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,               \
+    raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,    \
+    raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,                      \
+    raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,               \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_refine(int64_t, float, float, int64_t);
+instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t);
+instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t);
+
+#undef instantiate_raft_neighbors_refine
diff --git a/cpp/include/raft/neighbors/refine-inl.cuh b/cpp/include/raft/neighbors/refine-inl.cuh
new file mode 100644
index 0000000000..4243d7e723
--- /dev/null
+++ b/cpp/include/raft/neighbors/refine-inl.cuh
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdspan.hpp>
+#include <raft/matrix/matrix.cuh>
+#include <raft/neighbors/detail/refine.cuh>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
+namespace raft::neighbors {
+
+/**
+ * @defgroup ann_refine Approximate Nearest Neighbors Refinement
+ * @{
+ */
+
+/**
+ * @brief Refine nearest neighbor search.
+ *
+ * Refinement is an operation that follows an approximate NN search. The approximate search has
+ * already selected n_candidates neighbor candidates for each query. We narrow it down to k
+ * neighbors. For each query, we calculate the exact distance between the query and its
+ * n_candidates neighbor candidate, and select the k nearest ones.
+ *
+ * The k nearest neighbors and distances are returned.
+ *
+ * Example usage
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   // use default index parameters
+ *   ivf_pq::index_params index_params;
+ *   // create and fill the index from a [N, D] dataset
+ *   auto index = ivf_pq::build(handle, index_params, dataset, N, D);
+ *   // use default search parameters
+ *   ivf_pq::search_params search_params;
+ *   // search m = 4 * k nearest neighbours for each of the N queries
+ *   ivf_pq::search(handle, search_params, index, queries, N, 4 * k, neighbor_candidates,
+ *                  out_dists_tmp);
+ *   // refine it to the k nearest one
+ *   refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists,
+ *           index.metric());
+ * @endcode
+ *
+ *
+ * @param[in] handle the raft handle
+ * @param[in] dataset device matrix that stores the dataset [n_rows, dims]
+ * @param[in] queries device matrix of the queries [n_queris, dims]
+ * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where
+ *   n_candidates >= k
+ * @param[out] indices device matrix that stores the refined indices [n_queries, k]
+ * @param[out] distances device matrix that stores the refined distances [n_queries, k]
+ * @param[in] metric distance metric to use. Euclidean (L2) is used by default
+ */
+template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
+void refine(raft::device_resources const& handle,
+            raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
+            raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
+            raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
+            raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
+            raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,
+            distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
+{
+  detail::refine_device(handle, dataset, queries, neighbor_candidates, indices, distances, metric);
+}
+
+/** Same as above, but all input and out data is in host memory.
+ * @param[in] handle the raft handle
+ * @param[in] dataset host matrix that stores the dataset [n_rows, dims]
+ * @param[in] queries host matrix of the queries [n_queris, dims]
+ * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries,
+ *   n_candidates], where n_candidates >= k
+ * @param[out] indices host matrix that stores the refined indices [n_queries, k]
+ * @param[out] distances host matrix that stores the refined distances [n_queries, k]
+ * @param[in] metric distance metric to use. Euclidean (L2) is used by default
+ */
+template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
+void refine(raft::device_resources const& handle,
+            raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,
+            raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,
+            raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
+            raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,
+            raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,
+            distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
+{
+  detail::refine_host(dataset, queries, neighbor_candidates, indices, distances, metric);
+}
+
+/** @} */  // end group ann_refine
+}  // namespace raft::neighbors
diff --git a/cpp/include/raft/neighbors/refine.cuh b/cpp/include/raft/neighbors/refine.cuh
index 4243d7e723..15f2b02928 100644
--- a/cpp/include/raft/neighbors/refine.cuh
+++ b/cpp/include/raft/neighbors/refine.cuh
@@ -13,93 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/host_mdspan.hpp>
-#include <raft/matrix/matrix.cuh>
-#include <raft/neighbors/detail/refine.cuh>
-#include <raft/spatial/knn/detail/ann_utils.cuh>
-
-namespace raft::neighbors {
-
-/**
- * @defgroup ann_refine Approximate Nearest Neighbors Refinement
- * @{
- */
-
-/**
- * @brief Refine nearest neighbor search.
- *
- * Refinement is an operation that follows an approximate NN search. The approximate search has
- * already selected n_candidates neighbor candidates for each query. We narrow it down to k
- * neighbors. For each query, we calculate the exact distance between the query and its
- * n_candidates neighbor candidate, and select the k nearest ones.
- *
- * The k nearest neighbors and distances are returned.
- *
- * Example usage
- * @code{.cpp}
- *   using namespace raft::neighbors;
- *   // use default index parameters
- *   ivf_pq::index_params index_params;
- *   // create and fill the index from a [N, D] dataset
- *   auto index = ivf_pq::build(handle, index_params, dataset, N, D);
- *   // use default search parameters
- *   ivf_pq::search_params search_params;
- *   // search m = 4 * k nearest neighbours for each of the N queries
- *   ivf_pq::search(handle, search_params, index, queries, N, 4 * k, neighbor_candidates,
- *                  out_dists_tmp);
- *   // refine it to the k nearest one
- *   refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists,
- *           index.metric());
- * @endcode
- *
- *
- * @param[in] handle the raft handle
- * @param[in] dataset device matrix that stores the dataset [n_rows, dims]
- * @param[in] queries device matrix of the queries [n_queris, dims]
- * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where
- *   n_candidates >= k
- * @param[out] indices device matrix that stores the refined indices [n_queries, k]
- * @param[out] distances device matrix that stores the refined distances [n_queries, k]
- * @param[in] metric distance metric to use. Euclidean (L2) is used by default
- */
-template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
-            raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
-            raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
-            raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
-            raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
-            raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,
-            distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
-{
-  detail::refine_device(handle, dataset, queries, neighbor_candidates, indices, distances, metric);
-}
-
-/** Same as above, but all input and out data is in host memory.
- * @param[in] handle the raft handle
- * @param[in] dataset host matrix that stores the dataset [n_rows, dims]
- * @param[in] queries host matrix of the queries [n_queris, dims]
- * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries,
- *   n_candidates], where n_candidates >= k
- * @param[out] indices host matrix that stores the refined indices [n_queries, k]
- * @param[out] distances host matrix that stores the refined distances [n_queries, k]
- * @param[in] metric distance metric to use. Euclidean (L2) is used by default
- */
-template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
-            raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,
-            raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,
-            raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
-            raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,
-            raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,
-            distance::DistanceType metric = distance::DistanceType::L2Unexpanded)
-{
-  detail::refine_host(dataset, queries, neighbor_candidates, indices, distances, metric);
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "refine-inl.cuh"
+#endif
 
-/** @} */  // end group ann_refine
-}  // namespace raft::neighbors
+#ifdef RAFT_COMPILED
+#include "refine-ext.cuh"
+#endif
diff --git a/cpp/include/raft/neighbors/specializations.cuh b/cpp/include/raft/neighbors/specializations.cuh
index 9da5649ef8..ed0b6848ae 100644
--- a/cpp/include/raft/neighbors/specializations.cuh
+++ b/cpp/include/raft/neighbors/specializations.cuh
@@ -13,17 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/specializations/ball_cover.cuh>
-#include <raft/neighbors/specializations/brute_force.cuh>
-#include <raft/neighbors/specializations/fused_l2_knn.cuh>
-
-#include <raft/neighbors/specializations/ivf_flat.cuh>
-#include <raft/neighbors/specializations/ivf_pq.cuh>
-#include <raft/neighbors/specializations/refine.cuh>
-
-#include <raft/cluster/specializations.cuh>
-#include <raft/distance/specializations.cuh>
-#include <raft/matrix/specializations.cuh>
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/ball_cover.cuh b/cpp/include/raft/neighbors/specializations/ball_cover.cuh
index d6a6b2e296..ed0b6848ae 100644
--- a/cpp/include/raft/neighbors/specializations/ball_cover.cuh
+++ b/cpp/include/raft/neighbors/specializations/ball_cover.cuh
@@ -13,41 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/ball_cover.cuh>
-#include <raft/neighbors/ball_cover_types.hpp>
-#include <raft/neighbors/specializations/detail/ball_cover_lowdim.hpp>
-
-#include <cstdint>
-
-namespace raft::neighbors::ball_cover {
-extern template class BallCoverIndex<int, float, std::uint32_t, std::uint32_t>;
-extern template class BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>;
-
-extern template void build_index<std::int64_t, float, std::uint32_t, std::uint32_t>(
-  raft::device_resources const& handle,
-  BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index);
-
-extern template void knn_query<std::int64_t, float, std::uint32_t>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index,
-  std::uint32_t k,
-  const float* query,
-  std::uint32_t n_query_pts,
-  std::int64_t* inds,
-  float* dists,
-  bool perform_post_filtering,
-  float weight);
-
-extern template void all_knn_query<std::int64_t, float, std::uint32_t, std::uint32_t>(
-  raft::device_resources const& handle,
-  BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index,
-  std::uint32_t k,
-  std::int64_t* inds,
-  float* dists,
-  bool perform_post_filtering,
-  float weight);
-
-};  // namespace raft::neighbors::ball_cover
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/brute_force.cuh b/cpp/include/raft/neighbors/specializations/brute_force.cuh
index 1337beb68a..ed0b6848ae 100644
--- a/cpp/include/raft/neighbors/specializations/brute_force.cuh
+++ b/cpp/include/raft/neighbors/specializations/brute_force.cuh
@@ -13,34 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/brute_force.cuh>
-
-// also define the detail api, which is used by raft::neighbors::brute_force
-// (not doing the public api, since has extra template params on index_layout, matrix_index,
-// search_layout etc - and isn't clear what the defaults here should be)
-namespace raft::neighbors::detail {
-#define RAFT_INST(IdxT, T, IntT)                                                                 \
-  extern template void brute_force_knn_impl<IntT, IdxT, T>(raft::device_resources const& handle, \
-                                                           std::vector<T*>& input,               \
-                                                           std::vector<IntT>& sizes,             \
-                                                           IntT D,                               \
-                                                           T* search_items,                      \
-                                                           IntT n,                               \
-                                                           IdxT* res_I,                          \
-                                                           T* res_D,                             \
-                                                           IntT k,                               \
-                                                           bool rowMajorIndex,                   \
-                                                           bool rowMajorQuery,                   \
-                                                           std::vector<IdxT>* translations,      \
-                                                           raft::distance::DistanceType metric,  \
-                                                           float metricArg,                      \
-                                                           raft::identity_op);
-RAFT_INST(long, float, int);
-RAFT_INST(long, float, unsigned int);
-RAFT_INST(uint32_t, float, int);
-RAFT_INST(uint32_t, float, unsigned int);
-#undef RAFT_INST
-}  // namespace raft::neighbors::detail
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh b/cpp/include/raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh
index f1c46b1225..9588a7f329 100644
--- a/cpp/include/raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh
+++ b/cpp/include/raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh
@@ -13,38 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-namespace {
-using fp8s_t = fp_8bit<5, true>;
-using fp8u_t = fp_8bit<5, false>;
-}  // namespace
-
-#define RAFT_INST(OutT, LutT)                                                                     \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, true, true>(uint32_t, uint32_t)  \
-    ->compute_similarity_kernel_t<OutT, LutT>;                                                    \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, true, false>(uint32_t, uint32_t) \
-    ->compute_similarity_kernel_t<OutT, LutT>;                                                    \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, false, true>(uint32_t, uint32_t) \
-    ->compute_similarity_kernel_t<OutT, LutT>;
-
-#define RAFT_INST_ALL_OUT_T(LutT) \
-  RAFT_INST(float, LutT)          \
-  RAFT_INST(half, LutT)
-
-RAFT_INST_ALL_OUT_T(float)
-RAFT_INST_ALL_OUT_T(half)
-RAFT_INST_ALL_OUT_T(fp8s_t)
-RAFT_INST_ALL_OUT_T(fp8u_t)
-
-#undef RAFT_INST
-#undef RAFT_INST_ALL_OUT_T
-
-}  // namespace raft::neighbors::ivf_pq::detail
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/fused_l2_knn.cuh b/cpp/include/raft/neighbors/specializations/fused_l2_knn.cuh
index 916db8f0a2..ed0b6848ae 100644
--- a/cpp/include/raft/neighbors/specializations/fused_l2_knn.cuh
+++ b/cpp/include/raft/neighbors/specializations/fused_l2_knn.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,68 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#pragma once
 
-#include <cstdint>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-extern template void fusedL2Knn<long, float, true>(size_t D,
-                                                   long* out_inds,
-                                                   float* out_dists,
-                                                   const float* index,
-                                                   const float* query,
-                                                   size_t n_index_rows,
-                                                   size_t n_query_rows,
-                                                   int k,
-                                                   bool rowMajorIndex,
-                                                   bool rowMajorQuery,
-                                                   cudaStream_t stream,
-                                                   raft::distance::DistanceType metric);
-
-extern template void fusedL2Knn<long, float, false>(size_t D,
-                                                    long* out_inds,
-                                                    float* out_dists,
-                                                    const float* index,
-                                                    const float* query,
-                                                    size_t n_index_rows,
-                                                    size_t n_query_rows,
-                                                    int k,
-                                                    bool rowMajorIndex,
-                                                    bool rowMajorQuery,
-                                                    cudaStream_t stream,
-                                                    raft::distance::DistanceType metric);
-
-extern template void fusedL2Knn<int, float, true>(size_t D,
-                                                  int* out_inds,
-                                                  float* out_dists,
-                                                  const float* index,
-                                                  const float* query,
-                                                  size_t n_index_rows,
-                                                  size_t n_query_rows,
-                                                  int k,
-                                                  bool rowMajorIndex,
-                                                  bool rowMajorQuery,
-                                                  cudaStream_t stream,
-                                                  raft::distance::DistanceType metric);
-
-extern template void fusedL2Knn<int, float, false>(size_t D,
-                                                   int* out_inds,
-                                                   float* out_dists,
-                                                   const float* index,
-                                                   const float* query,
-                                                   size_t n_index_rows,
-                                                   size_t n_query_rows,
-                                                   int k,
-                                                   bool rowMajorIndex,
-                                                   bool rowMajorQuery,
-                                                   cudaStream_t stream,
-                                                   raft::distance::DistanceType metric);
-
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/ivf_flat.cuh b/cpp/include/raft/neighbors/specializations/ivf_flat.cuh
index 161f3462c9..ac3b80e8d9 100644
--- a/cpp/include/raft/neighbors/specializations/ivf_flat.cuh
+++ b/cpp/include/raft/neighbors/specializations/ivf_flat.cuh
@@ -13,65 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/ivf_flat.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-// greppable-id-specializations-ivf-flat-search: The ivfflat_interleaved_scan
-// function is used in both raft::neighbors::ivf_flat::search and
-// raft::neighbors::detail::refine_device. To prevent a duplicate instantiation
-// of this function (which defines ~270 kernels) in the refine specializations,
-// an extern template definition is provided here. Please check related function
-// calls after editing template definition below. Search for
-// `greppable-id-specializations-ivf-flat-search` to find them.
-#define RAFT_INST(T, IdxT)                                                               \
-  extern template auto build(raft::device_resources const& handle,                       \
-                             const index_params& params,                                 \
-                             raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<T, IdxT>;                                                                    \
-                                                                                         \
-  extern template auto extend(                                                           \
-    raft::device_resources const& handle,                                                \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                      \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,               \
-    const index<T, IdxT>& orig_index)                                                    \
-    ->index<T, IdxT>;                                                                    \
-                                                                                         \
-  extern template void extend(                                                           \
-    raft::device_resources const& handle,                                                \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                      \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,               \
-    raft::neighbors::ivf_flat::index<T, IdxT>* idx);                                     \
-                                                                                         \
-  extern template void search(raft::device_resources const&,                             \
-                              raft::neighbors::ivf_flat::search_params const&,           \
-                              const raft::neighbors::ivf_flat::index<T, IdxT>&,          \
-                              raft::device_matrix_view<const T, IdxT, row_major>,        \
-                              raft::device_matrix_view<IdxT, IdxT, row_major>,           \
-                              raft::device_matrix_view<float, IdxT, row_major>);         \
-                                                                                         \
-  extern template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<      \
-    T,                                                                                   \
-    typename raft::spatial::knn::detail::utils::config<T>::value_t,                      \
-    IdxT>(const index<T, IdxT>& index,                                                   \
-          const T* queries,                                                              \
-          const uint32_t* coarse_query_results,                                          \
-          const uint32_t n_queries,                                                      \
-          const raft::distance::DistanceType metric,                                     \
-          const uint32_t n_probes,                                                       \
-          const uint32_t k,                                                              \
-          const bool select_min,                                                         \
-          IdxT* neighbors,                                                               \
-          float* distances,                                                              \
-          uint32_t& grid_dim_x,                                                          \
-          rmm::cuda_stream_view stream);
-
-RAFT_INST(float, int64_t);
-RAFT_INST(int8_t, int64_t);
-RAFT_INST(uint8_t, int64_t);
-
-#undef RAFT_INST
-}  // namespace raft::neighbors::ivf_flat
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
index 9209f5095d..9588a7f329 100644
--- a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
+++ b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh
@@ -13,63 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/cluster/specializations.cuh>
-#include <raft/distance/specializations.cuh>
-#include <raft/matrix/specializations.cuh>
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#ifdef RAFT_DECL_BUILD_EXTEND
-#undef RAFT_DECL_BUILD_EXTEND
-#endif
-
-#ifdef RAFT_DECL_SEARCH
-#undef RAFT_DECL_SEARCH
-#endif
-
-// We define overloads for build and extend with void return type. This is used in the Cython
-// wrappers, where exception handling is not compatible with return type that has nontrivial
-// constructor.
-#define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                                  \
-  extern template auto build(raft::device_resources const&,                              \
-                             const raft::neighbors::ivf_pq::index_params&,               \
-                             raft::device_matrix_view<const T, IdxT, row_major>)         \
-    ->raft::neighbors::ivf_pq::index<IdxT>;                                              \
-                                                                                         \
-  extern template auto extend(raft::device_resources const&,                             \
-                              raft::device_matrix_view<const T, IdxT, row_major>,        \
-                              std::optional<raft::device_vector_view<const IdxT, IdxT>>, \
-                              const raft::neighbors::ivf_pq::index<IdxT>&)               \
-    ->raft::neighbors::ivf_pq::index<IdxT>;                                              \
-                                                                                         \
-  extern template void extend(raft::device_resources const&,                             \
-                              raft::device_matrix_view<const T, IdxT, row_major>,        \
-                              std::optional<raft::device_vector_view<const IdxT, IdxT>>, \
-                              raft::neighbors::ivf_pq::index<IdxT>*);
-
-RAFT_DECL_BUILD_EXTEND(float, int64_t)
-RAFT_DECL_BUILD_EXTEND(int8_t, int64_t)
-RAFT_DECL_BUILD_EXTEND(uint8_t, int64_t)
-
-#undef RAFT_DECL_BUILD_EXTEND
-
-#define RAFT_DECL_SEARCH(T, IdxT)                                                 \
-  extern template void search(raft::device_resources const&,                      \
-                              const raft::neighbors::ivf_pq::search_params&,      \
-                              const raft::neighbors::ivf_pq::index<IdxT>&,        \
-                              raft::device_matrix_view<const T, IdxT, row_major>, \
-                              raft::device_matrix_view<IdxT, IdxT, row_major>,    \
-                              raft::device_matrix_view<float, IdxT, row_major>);
-
-RAFT_DECL_SEARCH(float, int64_t);
-RAFT_DECL_SEARCH(int8_t, int64_t);
-RAFT_DECL_SEARCH(uint8_t, int64_t);
-
-#undef RAFT_DECL_SEARCH
-
-}  // namespace raft::neighbors::ivf_pq
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/neighbors/specializations/refine.cuh b/cpp/include/raft/neighbors/specializations/refine.cuh
index aef4834c9f..9588a7f329 100644
--- a/cpp/include/raft/neighbors/specializations/refine.cuh
+++ b/cpp/include/raft/neighbors/specializations/refine.cuh
@@ -13,39 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/refine.cuh>
-
-namespace raft::neighbors {
-
-#ifdef RAFT_INST
-#undef RAFT_INST
-#endif
-
-#define RAFT_INST(T, IdxT)                                                        \
-  extern template void refine<IdxT, T, float, int64_t>(                           \
-    raft::device_resources const& handle,                                         \
-    raft::device_matrix_view<const T, int64_t, row_major> dataset,                \
-    raft::device_matrix_view<const T, int64_t, row_major> queries,                \
-    raft::device_matrix_view<const IdxT, int64_t, row_major> neighbor_candidates, \
-    raft::device_matrix_view<IdxT, int64_t, row_major> indices,                   \
-    raft::device_matrix_view<float, int64_t, row_major> distances,                \
-    distance::DistanceType metric);                                               \
-                                                                                  \
-  extern template void refine<IdxT, T, float, int64_t>(                           \
-    raft::device_resources const& handle,                                         \
-    raft::host_matrix_view<const T, int64_t, row_major> dataset,                  \
-    raft::host_matrix_view<const T, int64_t, row_major> queries,                  \
-    raft::host_matrix_view<const IdxT, int64_t, row_major> neighbor_candidates,   \
-    raft::host_matrix_view<IdxT, int64_t, row_major> indices,                     \
-    raft::host_matrix_view<float, int64_t, row_major> distances,                  \
-    distance::DistanceType metric);
-
-RAFT_INST(float, int64_t);
-RAFT_INST(uint8_t, int64_t);
-RAFT_INST(int8_t, int64_t);
-
-#undef RAFT_INST
-}  // namespace raft::neighbors
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/sparse/neighbors/specializations.cuh b/cpp/include/raft/sparse/neighbors/specializations.cuh
index 23ba38ccda..9588a7f329 100644
--- a/cpp/include/raft/sparse/neighbors/specializations.cuh
+++ b/cpp/include/raft/sparse/neighbors/specializations.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/distance/specializations.cuh>
-#include <raft/neighbors/specializations.cuh>
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
index e489f24242..dd291251b4 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
@@ -17,7 +17,6 @@
 #pragma once
 
 #include <raft/core/logger.hpp>
-#include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
index 0a6718f5a5..ce72b2648f 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "../haversine_distance.cuh"
+#include "registers_types.cuh"
 #include <cstdint>
 #include <thrust/functional.h>
 #include <thrust/tuple.h>
@@ -39,42 +40,6 @@ struct NNComp {
   }
 };
 
-template <typename value_t, typename value_int = std::uint32_t>
-struct DistFunc {
-  virtual __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
-                                                                 const value_t* b,
-                                                                 const value_int n_dims)
-  {
-    return -1;
-  };
-};
-
-template <typename value_t, typename value_int = std::uint32_t>
-struct HaversineFunc : public DistFunc<value_t, value_int> {
-  __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
-                                                         const value_t* b,
-                                                         const value_int n_dims) override
-  {
-    return raft::spatial::knn::detail::compute_haversine(a[0], b[0], a[1], b[1]);
-  }
-};
-
-template <typename value_t, typename value_int = std::uint32_t>
-struct EuclideanFunc : public DistFunc<value_t, value_int> {
-  __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
-                                                         const value_t* b,
-                                                         const value_int n_dims) override
-  {
-    value_t sum_sq = 0;
-    for (value_int i = 0; i < n_dims; ++i) {
-      value_t diff = a[i] - b[i];
-      sum_sq += diff * diff;
-    }
-
-    return raft::sqrt(sum_sq);
-  }
-};
-
 /**
  * Zeros the bit at location h in a one-hot encoded 32-bit int array
  */
@@ -105,4 +70,4 @@ __device__ inline bool _get_val(std::uint32_t* arr, std::uint32_t h)
 };  // namespace detail
 };  // namespace knn
 };  // namespace spatial
-};  // namespace raft
\ No newline at end of file
+};  // namespace raft
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
new file mode 100644
index 0000000000..efe1a8a70b
--- /dev/null
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "../../ball_cover_types.hpp"   // BallCoverIndex
+#include "registers_types.cuh"          // DistFunc
+#include <cstdint>                      // uint32_t
+#include <raft/util/raft_explicit.hpp>  //RAFT_EXPLICIT
+
+#if defined(RAFT_EXPLICIT_INSTANTIATE_ONLY)
+
+namespace raft::spatial::knn::detail {
+
+template <typename value_idx,
+          typename value_t,
+          typename value_int = std::uint32_t,
+          int dims           = 2,
+          typename dist_func>
+void rbc_low_dim_pass_one(raft::device_resources const& handle,
+                          const BallCoverIndex<value_idx, value_t, value_int>& index,
+                          const value_t* query,
+                          const value_int n_query_rows,
+                          value_int k,
+                          const value_idx* R_knn_inds,
+                          const value_t* R_knn_dists,
+                          dist_func& dfunc,
+                          value_idx* inds,
+                          value_t* dists,
+                          float weight,
+                          value_int* dists_counter) RAFT_EXPLICIT;
+
+template <typename value_idx,
+          typename value_t,
+          typename value_int = std::uint32_t,
+          int dims           = 2,
+          typename dist_func>
+void rbc_low_dim_pass_two(raft::device_resources const& handle,
+                          const BallCoverIndex<value_idx, value_t, value_int>& index,
+                          const value_t* query,
+                          const value_int n_query_rows,
+                          value_int k,
+                          const value_idx* R_knn_inds,
+                          const value_t* R_knn_dists,
+                          dist_func& dfunc,
+                          value_idx* inds,
+                          value_t* dists,
+                          float weight,
+                          value_int* post_dists_counter) RAFT_EXPLICIT;
+
+};      // namespace raft::spatial::knn::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  extern template void                                                                       \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  extern template void                                                                       \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc);
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc);
+
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
new file mode 100644
index 0000000000..e0e7d716ee
--- /dev/null
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
@@ -0,0 +1,780 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "common.cuh"
+
+#include "../../ball_cover_types.hpp"
+#include "../haversine_distance.cuh"
+#include "registers_types.cuh"  // DistFunc
+
+#include <cstdint>
+#include <limits.h>
+
+#include <raft/neighbors/detail/faiss_select/key_value_block_select.cuh>
+#include <raft/util/cuda_utils.cuh>
+
+#include <thrust/fill.h>
+
+namespace raft {
+namespace spatial {
+namespace knn {
+namespace detail {
+
+/**
+ * To find exact neighbors, we perform a post-processing stage
+ * that filters out those points which might have neighbors outside
+ * of their k closest landmarks. This is usually a very small portion
+ * of the total points.
+ * @tparam value_idx
+ * @tparam value_t
+ * @tparam value_int
+ * @tparam tpb
+ * @param X
+ * @param n_cols
+ * @param R_knn_inds
+ * @param R_knn_dists
+ * @param R_radius
+ * @param landmarks
+ * @param n_landmarks
+ * @param bitset_size
+ * @param k
+ * @param output
+ * @param weight
+ */
+template <typename value_idx,
+          typename value_t,
+          typename value_int = std::uint32_t,
+          int col_q          = 2,
+          int tpb            = 32,
+          typename distance_func>
+__global__ void perform_post_filter_registers(const value_t* X,
+                                              value_int n_cols,
+                                              const value_idx* R_knn_inds,
+                                              const value_t* R_knn_dists,
+                                              const value_t* R_radius,
+                                              const value_t* landmarks,
+                                              int n_landmarks,
+                                              value_int bitset_size,
+                                              value_int k,
+                                              distance_func dfunc,
+                                              std::uint32_t* output,
+                                              float weight = 1.0)
+{
+  // allocate array of size n_landmarks / 32 ints
+  extern __shared__ std::uint32_t shared_mem[];
+
+  // Start with all bits on
+  for (value_int i = threadIdx.x; i < bitset_size; i += tpb) {
+    shared_mem[i] = 0xffffffff;
+  }
+
+  __syncthreads();
+
+  // TODO: Would it be faster to use L1 for this?
+  value_t local_x_ptr[col_q];
+  for (value_int j = 0; j < n_cols; ++j) {
+    local_x_ptr[j] = X[n_cols * blockIdx.x + j];
+  }
+
+  value_t closest_R_dist = R_knn_dists[blockIdx.x * k + (k - 1)];
+
+  // zero out bits for closest k landmarks
+  for (value_int j = threadIdx.x; j < k; j += tpb) {
+    _zero_bit(shared_mem, (std::uint32_t)R_knn_inds[blockIdx.x * k + j]);
+  }
+
+  __syncthreads();
+
+  // Discard any landmarks where p(q, r) > p(q, r_q) + radius(r)
+  // That is, the distance between the current point and the current
+  // landmark is > the distance between the current point and
+  // its closest landmark + the radius of the current landmark.
+  for (value_int l = threadIdx.x; l < n_landmarks; l += tpb) {
+    // compute p(q, r)
+    value_t dist = dfunc(local_x_ptr, landmarks + (n_cols * l), n_cols);
+    if (dist > weight * (closest_R_dist + R_radius[l]) || dist > 3 * closest_R_dist) {
+      _zero_bit(shared_mem, l);
+    }
+  }
+
+  __syncthreads();
+
+  /**
+   * Output bitset
+   */
+  for (value_int l = threadIdx.x; l < bitset_size; l += tpb) {
+    output[blockIdx.x * bitset_size + l] = shared_mem[l];
+  }
+}
+
+/**
+ * @tparam value_idx
+ * @tparam value_t
+ * @tparam value_int
+ * @tparam bitset_type
+ * @tparam warp_q number of registers to use per warp
+ * @tparam thread_q number of registers to use within each thread
+ * @tparam tpb number of threads per block
+ * @param X
+ * @param n_cols
+ * @param bitset
+ * @param bitset_size
+ * @param R_knn_dists
+ * @param R_indptr
+ * @param R_1nn_inds
+ * @param R_1nn_dists
+ * @param knn_inds
+ * @param knn_dists
+ * @param n_landmarks
+ * @param k
+ * @param dist_counter
+ */
+template <typename value_idx,
+          typename value_t,
+          typename value_int   = std::uint32_t,
+          typename bitset_type = std::uint32_t,
+          typename dist_func,
+          int warp_q   = 32,
+          int thread_q = 2,
+          int tpb      = 128,
+          int col_q    = 2>
+__global__ void compute_final_dists_registers(const value_t* X_index,
+                                              const value_t* X,
+                                              const value_int n_cols,
+                                              bitset_type* bitset,
+                                              value_int bitset_size,
+                                              const value_t* R_closest_landmark_dists,
+                                              const value_idx* R_indptr,
+                                              const value_idx* R_1nn_inds,
+                                              const value_t* R_1nn_dists,
+                                              value_idx* knn_inds,
+                                              value_t* knn_dists,
+                                              value_int n_landmarks,
+                                              value_int k,
+                                              dist_func dfunc,
+                                              value_int* dist_counter)
+{
+  static constexpr int kNumWarps = tpb / WarpSize;
+
+  __shared__ value_t shared_memK[kNumWarps * warp_q];
+  __shared__ KeyValuePair<value_t, value_idx> shared_memV[kNumWarps * warp_q];
+
+  const value_t* x_ptr = X + (n_cols * blockIdx.x);
+  value_t local_x_ptr[col_q];
+  for (value_int j = 0; j < n_cols; ++j) {
+    local_x_ptr[j] = x_ptr[j];
+  }
+
+  using namespace raft::neighbors::detail::faiss_select;
+  KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
+    std::numeric_limits<value_t>::max(),
+    std::numeric_limits<value_t>::max(),
+    -1,
+    shared_memK,
+    shared_memV,
+    k);
+
+  const value_int n_k = Pow2<WarpSize>::roundDown(k);
+  value_int i         = threadIdx.x;
+  for (; i < n_k; i += tpb) {
+    value_idx ind = knn_inds[blockIdx.x * k + i];
+    heap.add(knn_dists[blockIdx.x * k + i], R_closest_landmark_dists[ind], ind);
+  }
+
+  if (i < k) {
+    value_idx ind = knn_inds[blockIdx.x * k + i];
+    heap.addThreadQ(knn_dists[blockIdx.x * k + i], R_closest_landmark_dists[ind], ind);
+  }
+
+  heap.checkThreadQ();
+
+  for (value_int cur_R_ind = 0; cur_R_ind < n_landmarks; ++cur_R_ind) {
+    // if cur R overlaps cur point's closest R, it could be a
+    // candidate
+    if (_get_val(bitset + (blockIdx.x * bitset_size), cur_R_ind)) {
+      value_idx R_start_offset = R_indptr[cur_R_ind];
+      value_idx R_stop_offset  = R_indptr[cur_R_ind + 1];
+      value_idx R_size         = R_stop_offset - R_start_offset;
+
+      // Loop through R's neighborhood in parallel
+
+      // Round R_size to the nearest warp threads so they can
+      // all be computing in parallel.
+
+      const value_int limit = Pow2<WarpSize>::roundDown(R_size);
+
+      i = threadIdx.x;
+      for (; i < limit; i += tpb) {
+        value_idx cur_candidate_ind = R_1nn_inds[R_start_offset + i];
+        value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
+
+        value_t z = heap.warpKTopRDist == 0.00 ? 0.0
+                                               : (abs(heap.warpKTop - heap.warpKTopRDist) *
+                                                    abs(heap.warpKTopRDist - cur_candidate_dist) -
+                                                  heap.warpKTop * cur_candidate_dist) /
+                                                   heap.warpKTopRDist;
+        z         = isnan(z) || isinf(z) ? 0.0 : z;
+
+        // If lower bound on distance could possibly be in
+        // the closest k neighbors, compute it and add to k-select
+        value_t dist = std::numeric_limits<value_t>::max();
+        if (z <= heap.warpKTop) {
+          const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
+          value_t local_y_ptr[col_q];
+          for (value_int j = 0; j < n_cols; ++j) {
+            local_y_ptr[j] = y_ptr[j];
+          }
+
+          dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
+        }
+
+        heap.add(dist, cur_candidate_dist, cur_candidate_ind);
+      }
+
+      // second round guarantees to be only a single warp.
+      if (i < R_size) {
+        value_idx cur_candidate_ind = R_1nn_inds[R_start_offset + i];
+        value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
+
+        value_t z = heap.warpKTopRDist == 0.00 ? 0.0
+                                               : (abs(heap.warpKTop - heap.warpKTopRDist) *
+                                                    abs(heap.warpKTopRDist - cur_candidate_dist) -
+                                                  heap.warpKTop * cur_candidate_dist) /
+                                                   heap.warpKTopRDist;
+
+        z = isnan(z) || isinf(z) ? 0.0 : z;
+
+        // If lower bound on distance could possibly be in
+        // the closest k neighbors, compute it and add to k-select
+        value_t dist = std::numeric_limits<value_t>::max();
+        if (z <= heap.warpKTop) {
+          const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
+          value_t local_y_ptr[col_q];
+          for (value_int j = 0; j < n_cols; ++j) {
+            local_y_ptr[j] = y_ptr[j];
+          }
+          dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
+        }
+        heap.addThreadQ(dist, cur_candidate_dist, cur_candidate_ind);
+      }
+      heap.checkThreadQ();
+    }
+  }
+
+  heap.reduce();
+
+  for (value_int i = threadIdx.x; i < k; i += tpb) {
+    knn_dists[blockIdx.x * k + i] = shared_memK[i];
+    knn_inds[blockIdx.x * k + i]  = shared_memV[i].value;
+  }
+}
+
+/**
+ * Random ball cover kernel for n_dims == 2
+ * @tparam value_idx
+ * @tparam value_t
+ * @tparam warp_q
+ * @tparam thread_q
+ * @tparam tpb
+ * @tparam value_idx
+ * @tparam value_t
+ * @param R_knn_inds
+ * @param R_knn_dists
+ * @param m
+ * @param k
+ * @param R_indptr
+ * @param R_1nn_cols
+ * @param R_1nn_dists
+ */
+template <typename value_idx = std::int64_t,
+          typename value_t,
+          int warp_q         = 32,
+          int thread_q       = 2,
+          int tpb            = 128,
+          int col_q          = 2,
+          typename value_int = std::uint32_t,
+          typename distance_func>
+__global__ void block_rbc_kernel_registers(const value_t* X_index,
+                                           const value_t* X,
+                                           value_int n_cols,  // n_cols should be 2 or 3 dims
+                                           const value_idx* R_knn_inds,
+                                           const value_t* R_knn_dists,
+                                           value_int m,
+                                           value_int k,
+                                           const value_idx* R_indptr,
+                                           const value_idx* R_1nn_cols,
+                                           const value_t* R_1nn_dists,
+                                           value_idx* out_inds,
+                                           value_t* out_dists,
+                                           value_int* dist_counter,
+                                           const value_t* R_radius,
+                                           distance_func dfunc,
+                                           float weight = 1.0)
+{
+  static constexpr value_int kNumWarps = tpb / WarpSize;
+
+  __shared__ value_t shared_memK[kNumWarps * warp_q];
+  __shared__ KeyValuePair<value_t, value_idx> shared_memV[kNumWarps * warp_q];
+
+  // TODO: Separate kernels for different widths:
+  // 1. Very small (between 3 and 32) just use registers for columns of "blockIdx.x"
+  // 2. Can fit comfortably in shared memory (32 to a few thousand?)
+  // 3. Load each time individually.
+  const value_t* x_ptr = X + (n_cols * blockIdx.x);
+
+  // Use registers only for 2d or 3d
+  value_t local_x_ptr[col_q];
+  for (value_int i = 0; i < n_cols; ++i) {
+    local_x_ptr[i] = x_ptr[i];
+  }
+
+  // Each warp works on 1 R
+  using namespace raft::neighbors::detail::faiss_select;
+  KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
+    std::numeric_limits<value_t>::max(),
+    std::numeric_limits<value_t>::max(),
+    -1,
+    shared_memK,
+    shared_memV,
+    k);
+
+  value_t min_R_dist         = R_knn_dists[blockIdx.x * k + (k - 1)];
+  value_int n_dists_computed = 0;
+
+  /**
+   * First add distances for k closest neighbors of R
+   * to the heap
+   */
+  // Start iterating through elements of each set from closest R elements,
+  // determining if the distance could even potentially be in the heap.
+  for (value_int cur_k = 0; cur_k < k; ++cur_k) {
+    // index and distance to current blockIdx.x's closest landmark
+    value_t cur_R_dist  = R_knn_dists[blockIdx.x * k + cur_k];
+    value_idx cur_R_ind = R_knn_inds[blockIdx.x * k + cur_k];
+
+    // Equation (2) in Cayton's paper- prune out R's which are > 3 * p(q, r_q)
+    if (cur_R_dist > weight * (min_R_dist + R_radius[cur_R_ind])) continue;
+    if (cur_R_dist > 3 * min_R_dist) return;
+
+    // The whole warp should iterate through the elements in the current R
+    value_idx R_start_offset = R_indptr[cur_R_ind];
+    value_idx R_stop_offset  = R_indptr[cur_R_ind + 1];
+
+    value_idx R_size = R_stop_offset - R_start_offset;
+
+    value_int limit = Pow2<WarpSize>::roundDown(R_size);
+    value_int i     = threadIdx.x;
+    for (; i < limit; i += tpb) {
+      // Index and distance of current candidate's nearest landmark
+      value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i];
+      value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
+
+      // Take 2 landmarks l_1 and l_2 where l_1 is the furthest point in the heap
+      // and l_2 is the current landmark R. s is the current data point and
+      // t is the new candidate data point. We know that:
+      // d(s, t) cannot possibly be any smaller than | d(s, l_1) - d(l_1, l_2) | * | d(l_1, l_2) -
+      // d(l_2, t) | - d(s, l_1) * d(l_2, t)
+
+      // Therefore, if d(s, t) >= d(s, l_1) from the computation above, we know that the distance to
+      // the candidate point cannot possibly be in the nearest neighbors. However, if d(s, t) < d(s,
+      // l_1) then we should compute the distance because it's possible it could be smaller.
+      //
+      value_t z = heap.warpKTopRDist == 0.00 ? 0.0
+                                             : (abs(heap.warpKTop - heap.warpKTopRDist) *
+                                                  abs(heap.warpKTopRDist - cur_candidate_dist) -
+                                                heap.warpKTop * cur_candidate_dist) /
+                                                 heap.warpKTopRDist;
+
+      z            = isnan(z) || isinf(z) ? 0.0 : z;
+      value_t dist = std::numeric_limits<value_t>::max();
+
+      if (z <= heap.warpKTop) {
+        const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
+        value_t local_y_ptr[col_q];
+        for (value_int j = 0; j < n_cols; ++j) {
+          local_y_ptr[j] = y_ptr[j];
+        }
+        dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
+        ++n_dists_computed;
+      }
+
+      heap.add(dist, cur_candidate_dist, cur_candidate_ind);
+    }
+
+    if (i < R_size) {
+      value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i];
+      value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
+      value_t z                   = heap.warpKTopRDist == 0.0 ? 0.0
+                                                              : (abs(heap.warpKTop - heap.warpKTopRDist) *
+                                                 abs(heap.warpKTopRDist - cur_candidate_dist) -
+                                               heap.warpKTop * cur_candidate_dist) /
+                                                heap.warpKTopRDist;
+
+      z            = isnan(z) || isinf(z) ? 0.0 : z;
+      value_t dist = std::numeric_limits<value_t>::max();
+
+      if (z <= heap.warpKTop) {
+        const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
+        value_t local_y_ptr[col_q];
+        for (value_int j = 0; j < n_cols; ++j) {
+          local_y_ptr[j] = y_ptr[j];
+        }
+        dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
+        ++n_dists_computed;
+      }
+
+      heap.addThreadQ(dist, cur_candidate_dist, cur_candidate_ind);
+    }
+
+    heap.checkThreadQ();
+  }
+
+  heap.reduce();
+
+  for (int i = threadIdx.x; i < k; i += tpb) {
+    out_dists[blockIdx.x * k + i] = shared_memK[i];
+    out_inds[blockIdx.x * k + i]  = shared_memV[i].value;
+  }
+}
+
+template <typename value_idx,
+          typename value_t,
+          typename value_int = std::uint32_t,
+          int dims           = 2,
+          typename dist_func>
+void rbc_low_dim_pass_one(raft::device_resources const& handle,
+                          const BallCoverIndex<value_idx, value_t, value_int>& index,
+                          const value_t* query,
+                          const value_int n_query_rows,
+                          value_int k,
+                          const value_idx* R_knn_inds,
+                          const value_t* R_knn_dists,
+                          dist_func& dfunc,
+                          value_idx* inds,
+                          value_t* dists,
+                          float weight,
+                          value_int* dists_counter)
+{
+  if (k <= 32)
+    block_rbc_kernel_registers<value_idx, value_t, 32, 2, 128, dims, value_int>
+      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                      query,
+                                                      index.n,
+                                                      R_knn_inds,
+                                                      R_knn_dists,
+                                                      index.m,
+                                                      k,
+                                                      index.get_R_indptr().data_handle(),
+                                                      index.get_R_1nn_cols().data_handle(),
+                                                      index.get_R_1nn_dists().data_handle(),
+                                                      inds,
+                                                      dists,
+                                                      dists_counter,
+                                                      index.get_R_radius().data_handle(),
+                                                      dfunc,
+                                                      weight);
+
+  else if (k <= 64)
+    block_rbc_kernel_registers<value_idx, value_t, 64, 3, 128, 2, value_int>
+      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                      query,
+                                                      index.n,
+                                                      R_knn_inds,
+                                                      R_knn_dists,
+                                                      index.m,
+                                                      k,
+                                                      index.get_R_indptr().data_handle(),
+                                                      index.get_R_1nn_cols().data_handle(),
+                                                      index.get_R_1nn_dists().data_handle(),
+                                                      inds,
+                                                      dists,
+                                                      dists_counter,
+                                                      index.get_R_radius().data_handle(),
+                                                      dfunc,
+                                                      weight);
+  else if (k <= 128)
+    block_rbc_kernel_registers<value_idx, value_t, 128, 3, 128, dims, value_int>
+      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                      query,
+                                                      index.n,
+                                                      R_knn_inds,
+                                                      R_knn_dists,
+                                                      index.m,
+                                                      k,
+                                                      index.get_R_indptr().data_handle(),
+                                                      index.get_R_1nn_cols().data_handle(),
+                                                      index.get_R_1nn_dists().data_handle(),
+                                                      inds,
+                                                      dists,
+                                                      dists_counter,
+                                                      index.get_R_radius().data_handle(),
+                                                      dfunc,
+                                                      weight);
+
+  else if (k <= 256)
+    block_rbc_kernel_registers<value_idx, value_t, 256, 4, 128, dims, value_int>
+      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                      query,
+                                                      index.n,
+                                                      R_knn_inds,
+                                                      R_knn_dists,
+                                                      index.m,
+                                                      k,
+                                                      index.get_R_indptr().data_handle(),
+                                                      index.get_R_1nn_cols().data_handle(),
+                                                      index.get_R_1nn_dists().data_handle(),
+                                                      inds,
+                                                      dists,
+                                                      dists_counter,
+                                                      index.get_R_radius().data_handle(),
+                                                      dfunc,
+                                                      weight);
+
+  else if (k <= 512)
+    block_rbc_kernel_registers<value_idx, value_t, 512, 8, 64, dims, value_int>
+      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                     query,
+                                                     index.n,
+                                                     R_knn_inds,
+                                                     R_knn_dists,
+                                                     index.m,
+                                                     k,
+                                                     index.get_R_indptr().data_handle(),
+                                                     index.get_R_1nn_cols().data_handle(),
+                                                     index.get_R_1nn_dists().data_handle(),
+                                                     inds,
+                                                     dists,
+                                                     dists_counter,
+                                                     index.get_R_radius().data_handle(),
+                                                     dfunc,
+                                                     weight);
+
+  else if (k <= 1024)
+    block_rbc_kernel_registers<value_idx, value_t, 1024, 8, 64, dims, value_int>
+      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
+                                                     query,
+                                                     index.n,
+                                                     R_knn_inds,
+                                                     R_knn_dists,
+                                                     index.m,
+                                                     k,
+                                                     index.get_R_indptr().data_handle(),
+                                                     index.get_R_1nn_cols().data_handle(),
+                                                     index.get_R_1nn_dists().data_handle(),
+                                                     inds,
+                                                     dists,
+                                                     dists_counter,
+                                                     index.get_R_radius().data_handle(),
+                                                     dfunc,
+                                                     weight);
+}
+
+template <typename value_idx,
+          typename value_t,
+          typename value_int = std::uint32_t,
+          int dims           = 2,
+          typename dist_func>
+void rbc_low_dim_pass_two(raft::device_resources const& handle,
+                          const BallCoverIndex<value_idx, value_t, value_int>& index,
+                          const value_t* query,
+                          const value_int n_query_rows,
+                          value_int k,
+                          const value_idx* R_knn_inds,
+                          const value_t* R_knn_dists,
+                          dist_func& dfunc,
+                          value_idx* inds,
+                          value_t* dists,
+                          float weight,
+                          value_int* post_dists_counter)
+{
+  const value_int bitset_size = ceil(index.n_landmarks / 32.0);
+
+  rmm::device_uvector<std::uint32_t> bitset(bitset_size * n_query_rows, handle.get_stream());
+  thrust::fill(handle.get_thrust_policy(), bitset.data(), bitset.data() + bitset.size(), 0);
+
+  perform_post_filter_registers<value_idx, value_t, value_int, dims, 128>
+    <<<n_query_rows, 128, bitset_size * sizeof(std::uint32_t), handle.get_stream()>>>(
+      query,
+      index.n,
+      R_knn_inds,
+      R_knn_dists,
+      index.get_R_radius().data_handle(),
+      index.get_R().data_handle(),
+      index.n_landmarks,
+      bitset_size,
+      k,
+      dfunc,
+      bitset.data(),
+      weight);
+
+  if (k <= 32)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  32,
+                                  2,
+                                  128,
+                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+  else if (k <= 64)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  64,
+                                  3,
+                                  128,
+                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+  else if (k <= 128)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  128,
+                                  3,
+                                  128,
+                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+  else if (k <= 256)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  256,
+                                  4,
+                                  128,
+                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+  else if (k <= 512)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  512,
+                                  8,
+                                  64,
+                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+  else if (k <= 1024)
+    compute_final_dists_registers<value_idx,
+                                  value_t,
+                                  value_int,
+                                  std::uint32_t,
+                                  dist_func,
+                                  1024,
+                                  8,
+                                  64,
+                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
+      index.get_X().data_handle(),
+      query,
+      index.n,
+      bitset.data(),
+      bitset_size,
+      index.get_R_closest_landmark_dists().data_handle(),
+      index.get_R_indptr().data_handle(),
+      index.get_R_1nn_cols().data_handle(),
+      index.get_R_1nn_dists().data_handle(),
+      inds,
+      dists,
+      index.n_landmarks,
+      k,
+      dfunc,
+      post_dists_counter);
+}
+
+};  // namespace detail
+};  // namespace knn
+};  // namespace spatial
+};  // namespace raft
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers.cuh
index f665368c41..8bd57b47cc 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers.cuh
@@ -13,767 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include "common.cuh"
-
-#include "../../ball_cover_types.hpp"
-#include "../haversine_distance.cuh"
-
-#include <cstdint>
-#include <limits.h>
-
-#include <raft/neighbors/detail/faiss_select/key_value_block_select.cuh>
-#include <raft/util/cuda_utils.cuh>
-
-#include <thrust/fill.h>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-/**
- * To find exact neighbors, we perform a post-processing stage
- * that filters out those points which might have neighbors outside
- * of their k closest landmarks. This is usually a very small portion
- * of the total points.
- * @tparam value_idx
- * @tparam value_t
- * @tparam value_int
- * @tparam tpb
- * @param X
- * @param n_cols
- * @param R_knn_inds
- * @param R_knn_dists
- * @param R_radius
- * @param landmarks
- * @param n_landmarks
- * @param bitset_size
- * @param k
- * @param output
- * @param weight
- */
-template <typename value_idx,
-          typename value_t,
-          typename value_int = std::uint32_t,
-          int col_q          = 2,
-          int tpb            = 32,
-          typename distance_func>
-__global__ void perform_post_filter_registers(const value_t* X,
-                                              value_int n_cols,
-                                              const value_idx* R_knn_inds,
-                                              const value_t* R_knn_dists,
-                                              const value_t* R_radius,
-                                              const value_t* landmarks,
-                                              int n_landmarks,
-                                              value_int bitset_size,
-                                              value_int k,
-                                              distance_func dfunc,
-                                              std::uint32_t* output,
-                                              float weight = 1.0)
-{
-  // allocate array of size n_landmarks / 32 ints
-  extern __shared__ std::uint32_t shared_mem[];
-
-  // Start with all bits on
-  for (value_int i = threadIdx.x; i < bitset_size; i += tpb) {
-    shared_mem[i] = 0xffffffff;
-  }
-
-  __syncthreads();
-
-  // TODO: Would it be faster to use L1 for this?
-  value_t local_x_ptr[col_q];
-  for (value_int j = 0; j < n_cols; ++j) {
-    local_x_ptr[j] = X[n_cols * blockIdx.x + j];
-  }
-
-  value_t closest_R_dist = R_knn_dists[blockIdx.x * k + (k - 1)];
-
-  // zero out bits for closest k landmarks
-  for (value_int j = threadIdx.x; j < k; j += tpb) {
-    _zero_bit(shared_mem, (std::uint32_t)R_knn_inds[blockIdx.x * k + j]);
-  }
-
-  __syncthreads();
-
-  // Discard any landmarks where p(q, r) > p(q, r_q) + radius(r)
-  // That is, the distance between the current point and the current
-  // landmark is > the distance between the current point and
-  // its closest landmark + the radius of the current landmark.
-  for (value_int l = threadIdx.x; l < n_landmarks; l += tpb) {
-    // compute p(q, r)
-    value_t dist = dfunc(local_x_ptr, landmarks + (n_cols * l), n_cols);
-    if (dist > weight * (closest_R_dist + R_radius[l]) || dist > 3 * closest_R_dist) {
-      _zero_bit(shared_mem, l);
-    }
-  }
-
-  __syncthreads();
-
-  /**
-   * Output bitset
-   */
-  for (value_int l = threadIdx.x; l < bitset_size; l += tpb) {
-    output[blockIdx.x * bitset_size + l] = shared_mem[l];
-  }
-}
-
-/**
- * @tparam value_idx
- * @tparam value_t
- * @tparam value_int
- * @tparam bitset_type
- * @tparam warp_q number of registers to use per warp
- * @tparam thread_q number of registers to use within each thread
- * @tparam tpb number of threads per block
- * @param X
- * @param n_cols
- * @param bitset
- * @param bitset_size
- * @param R_knn_dists
- * @param R_indptr
- * @param R_1nn_inds
- * @param R_1nn_dists
- * @param knn_inds
- * @param knn_dists
- * @param n_landmarks
- * @param k
- * @param dist_counter
- */
-template <typename value_idx,
-          typename value_t,
-          typename value_int   = std::uint32_t,
-          typename bitset_type = std::uint32_t,
-          typename dist_func,
-          int warp_q   = 32,
-          int thread_q = 2,
-          int tpb      = 128,
-          int col_q    = 2>
-__global__ void compute_final_dists_registers(const value_t* X_index,
-                                              const value_t* X,
-                                              const value_int n_cols,
-                                              bitset_type* bitset,
-                                              value_int bitset_size,
-                                              const value_t* R_closest_landmark_dists,
-                                              const value_idx* R_indptr,
-                                              const value_idx* R_1nn_inds,
-                                              const value_t* R_1nn_dists,
-                                              value_idx* knn_inds,
-                                              value_t* knn_dists,
-                                              value_int n_landmarks,
-                                              value_int k,
-                                              dist_func dfunc,
-                                              value_int* dist_counter)
-{
-  static constexpr int kNumWarps = tpb / WarpSize;
-
-  __shared__ value_t shared_memK[kNumWarps * warp_q];
-  __shared__ KeyValuePair<value_t, value_idx> shared_memV[kNumWarps * warp_q];
-
-  const value_t* x_ptr = X + (n_cols * blockIdx.x);
-  value_t local_x_ptr[col_q];
-  for (value_int j = 0; j < n_cols; ++j) {
-    local_x_ptr[j] = x_ptr[j];
-  }
-
-  using namespace raft::neighbors::detail::faiss_select;
-  KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
-    std::numeric_limits<value_t>::max(),
-    std::numeric_limits<value_t>::max(),
-    -1,
-    shared_memK,
-    shared_memV,
-    k);
-
-  const value_int n_k = Pow2<WarpSize>::roundDown(k);
-  value_int i         = threadIdx.x;
-  for (; i < n_k; i += tpb) {
-    value_idx ind = knn_inds[blockIdx.x * k + i];
-    heap.add(knn_dists[blockIdx.x * k + i], R_closest_landmark_dists[ind], ind);
-  }
-
-  if (i < k) {
-    value_idx ind = knn_inds[blockIdx.x * k + i];
-    heap.addThreadQ(knn_dists[blockIdx.x * k + i], R_closest_landmark_dists[ind], ind);
-  }
-
-  heap.checkThreadQ();
-
-  for (value_int cur_R_ind = 0; cur_R_ind < n_landmarks; ++cur_R_ind) {
-    // if cur R overlaps cur point's closest R, it could be a
-    // candidate
-    if (_get_val(bitset + (blockIdx.x * bitset_size), cur_R_ind)) {
-      value_idx R_start_offset = R_indptr[cur_R_ind];
-      value_idx R_stop_offset  = R_indptr[cur_R_ind + 1];
-      value_idx R_size         = R_stop_offset - R_start_offset;
-
-      // Loop through R's neighborhood in parallel
-
-      // Round R_size to the nearest warp threads so they can
-      // all be computing in parallel.
-
-      const value_int limit = Pow2<WarpSize>::roundDown(R_size);
-
-      i = threadIdx.x;
-      for (; i < limit; i += tpb) {
-        value_idx cur_candidate_ind = R_1nn_inds[R_start_offset + i];
-        value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
-
-        value_t z = heap.warpKTopRDist == 0.00 ? 0.0
-                                               : (abs(heap.warpKTop - heap.warpKTopRDist) *
-                                                    abs(heap.warpKTopRDist - cur_candidate_dist) -
-                                                  heap.warpKTop * cur_candidate_dist) /
-                                                   heap.warpKTopRDist;
-        z         = isnan(z) || isinf(z) ? 0.0 : z;
-
-        // If lower bound on distance could possibly be in
-        // the closest k neighbors, compute it and add to k-select
-        value_t dist = std::numeric_limits<value_t>::max();
-        if (z <= heap.warpKTop) {
-          const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
-          value_t local_y_ptr[col_q];
-          for (value_int j = 0; j < n_cols; ++j) {
-            local_y_ptr[j] = y_ptr[j];
-          }
-
-          dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
-        }
-
-        heap.add(dist, cur_candidate_dist, cur_candidate_ind);
-      }
-
-      // second round guarantees to be only a single warp.
-      if (i < R_size) {
-        value_idx cur_candidate_ind = R_1nn_inds[R_start_offset + i];
-        value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
-
-        value_t z = heap.warpKTopRDist == 0.00 ? 0.0
-                                               : (abs(heap.warpKTop - heap.warpKTopRDist) *
-                                                    abs(heap.warpKTopRDist - cur_candidate_dist) -
-                                                  heap.warpKTop * cur_candidate_dist) /
-                                                   heap.warpKTopRDist;
-
-        z = isnan(z) || isinf(z) ? 0.0 : z;
-
-        // If lower bound on distance could possibly be in
-        // the closest k neighbors, compute it and add to k-select
-        value_t dist = std::numeric_limits<value_t>::max();
-        if (z <= heap.warpKTop) {
-          const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
-          value_t local_y_ptr[col_q];
-          for (value_int j = 0; j < n_cols; ++j) {
-            local_y_ptr[j] = y_ptr[j];
-          }
-          dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
-        }
-        heap.addThreadQ(dist, cur_candidate_dist, cur_candidate_ind);
-      }
-      heap.checkThreadQ();
-    }
-  }
-
-  heap.reduce();
-
-  for (value_int i = threadIdx.x; i < k; i += tpb) {
-    knn_dists[blockIdx.x * k + i] = shared_memK[i];
-    knn_inds[blockIdx.x * k + i]  = shared_memV[i].value;
-  }
-}
-
-/**
- * Random ball cover kernel for n_dims == 2
- * @tparam value_idx
- * @tparam value_t
- * @tparam warp_q
- * @tparam thread_q
- * @tparam tpb
- * @tparam value_idx
- * @tparam value_t
- * @param R_knn_inds
- * @param R_knn_dists
- * @param m
- * @param k
- * @param R_indptr
- * @param R_1nn_cols
- * @param R_1nn_dists
- */
-template <typename value_idx = std::int64_t,
-          typename value_t,
-          int warp_q         = 32,
-          int thread_q       = 2,
-          int tpb            = 128,
-          int col_q          = 2,
-          typename value_int = std::uint32_t,
-          typename distance_func>
-__global__ void block_rbc_kernel_registers(const value_t* X_index,
-                                           const value_t* X,
-                                           value_int n_cols,  // n_cols should be 2 or 3 dims
-                                           const value_idx* R_knn_inds,
-                                           const value_t* R_knn_dists,
-                                           value_int m,
-                                           value_int k,
-                                           const value_idx* R_indptr,
-                                           const value_idx* R_1nn_cols,
-                                           const value_t* R_1nn_dists,
-                                           value_idx* out_inds,
-                                           value_t* out_dists,
-                                           value_int* dist_counter,
-                                           const value_t* R_radius,
-                                           distance_func dfunc,
-                                           float weight = 1.0)
-{
-  static constexpr value_int kNumWarps = tpb / WarpSize;
-
-  __shared__ value_t shared_memK[kNumWarps * warp_q];
-  __shared__ KeyValuePair<value_t, value_idx> shared_memV[kNumWarps * warp_q];
-
-  // TODO: Separate kernels for different widths:
-  // 1. Very small (between 3 and 32) just use registers for columns of "blockIdx.x"
-  // 2. Can fit comfortably in shared memory (32 to a few thousand?)
-  // 3. Load each time individually.
-  const value_t* x_ptr = X + (n_cols * blockIdx.x);
-
-  // Use registers only for 2d or 3d
-  value_t local_x_ptr[col_q];
-  for (value_int i = 0; i < n_cols; ++i) {
-    local_x_ptr[i] = x_ptr[i];
-  }
-
-  // Each warp works on 1 R
-  using namespace raft::neighbors::detail::faiss_select;
-  KeyValueBlockSelect<value_t, value_idx, false, Comparator<value_t>, warp_q, thread_q, tpb> heap(
-    std::numeric_limits<value_t>::max(),
-    std::numeric_limits<value_t>::max(),
-    -1,
-    shared_memK,
-    shared_memV,
-    k);
-
-  value_t min_R_dist         = R_knn_dists[blockIdx.x * k + (k - 1)];
-  value_int n_dists_computed = 0;
-
-  /**
-   * First add distances for k closest neighbors of R
-   * to the heap
-   */
-  // Start iterating through elements of each set from closest R elements,
-  // determining if the distance could even potentially be in the heap.
-  for (value_int cur_k = 0; cur_k < k; ++cur_k) {
-    // index and distance to current blockIdx.x's closest landmark
-    value_t cur_R_dist  = R_knn_dists[blockIdx.x * k + cur_k];
-    value_idx cur_R_ind = R_knn_inds[blockIdx.x * k + cur_k];
-
-    // Equation (2) in Cayton's paper- prune out R's which are > 3 * p(q, r_q)
-    if (cur_R_dist > weight * (min_R_dist + R_radius[cur_R_ind])) continue;
-    if (cur_R_dist > 3 * min_R_dist) return;
-
-    // The whole warp should iterate through the elements in the current R
-    value_idx R_start_offset = R_indptr[cur_R_ind];
-    value_idx R_stop_offset  = R_indptr[cur_R_ind + 1];
-
-    value_idx R_size = R_stop_offset - R_start_offset;
-
-    value_int limit = Pow2<WarpSize>::roundDown(R_size);
-    value_int i     = threadIdx.x;
-    for (; i < limit; i += tpb) {
-      // Index and distance of current candidate's nearest landmark
-      value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i];
-      value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
-
-      // Take 2 landmarks l_1 and l_2 where l_1 is the furthest point in the heap
-      // and l_2 is the current landmark R. s is the current data point and
-      // t is the new candidate data point. We know that:
-      // d(s, t) cannot possibly be any smaller than | d(s, l_1) - d(l_1, l_2) | * | d(l_1, l_2) -
-      // d(l_2, t) | - d(s, l_1) * d(l_2, t)
-
-      // Therefore, if d(s, t) >= d(s, l_1) from the computation above, we know that the distance to
-      // the candidate point cannot possibly be in the nearest neighbors. However, if d(s, t) < d(s,
-      // l_1) then we should compute the distance because it's possible it could be smaller.
-      //
-      value_t z = heap.warpKTopRDist == 0.00 ? 0.0
-                                             : (abs(heap.warpKTop - heap.warpKTopRDist) *
-                                                  abs(heap.warpKTopRDist - cur_candidate_dist) -
-                                                heap.warpKTop * cur_candidate_dist) /
-                                                 heap.warpKTopRDist;
-
-      z            = isnan(z) || isinf(z) ? 0.0 : z;
-      value_t dist = std::numeric_limits<value_t>::max();
-
-      if (z <= heap.warpKTop) {
-        const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
-        value_t local_y_ptr[col_q];
-        for (value_int j = 0; j < n_cols; ++j) {
-          local_y_ptr[j] = y_ptr[j];
-        }
-        dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
-        ++n_dists_computed;
-      }
-
-      heap.add(dist, cur_candidate_dist, cur_candidate_ind);
-    }
-
-    if (i < R_size) {
-      value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i];
-      value_t cur_candidate_dist  = R_1nn_dists[R_start_offset + i];
-      value_t z                   = heap.warpKTopRDist == 0.0 ? 0.0
-                                                              : (abs(heap.warpKTop - heap.warpKTopRDist) *
-                                                 abs(heap.warpKTopRDist - cur_candidate_dist) -
-                                               heap.warpKTop * cur_candidate_dist) /
-                                                heap.warpKTopRDist;
-
-      z            = isnan(z) || isinf(z) ? 0.0 : z;
-      value_t dist = std::numeric_limits<value_t>::max();
-
-      if (z <= heap.warpKTop) {
-        const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind);
-        value_t local_y_ptr[col_q];
-        for (value_int j = 0; j < n_cols; ++j) {
-          local_y_ptr[j] = y_ptr[j];
-        }
-        dist = dfunc(local_x_ptr, local_y_ptr, n_cols);
-        ++n_dists_computed;
-      }
-
-      heap.addThreadQ(dist, cur_candidate_dist, cur_candidate_ind);
-    }
-
-    heap.checkThreadQ();
-  }
-
-  heap.reduce();
-
-  for (int i = threadIdx.x; i < k; i += tpb) {
-    out_dists[blockIdx.x * k + i] = shared_memK[i];
-    out_inds[blockIdx.x * k + i]  = shared_memV[i].value;
-  }
-}
-
-template <typename value_idx,
-          typename value_t,
-          typename value_int = std::uint32_t,
-          int dims           = 2,
-          typename dist_func>
-void rbc_low_dim_pass_one(raft::device_resources const& handle,
-                          const BallCoverIndex<value_idx, value_t, value_int>& index,
-                          const value_t* query,
-                          const value_int n_query_rows,
-                          value_int k,
-                          const value_idx* R_knn_inds,
-                          const value_t* R_knn_dists,
-                          dist_func& dfunc,
-                          value_idx* inds,
-                          value_t* dists,
-                          float weight,
-                          value_int* dists_counter)
-{
-  if (k <= 32)
-    block_rbc_kernel_registers<value_idx, value_t, 32, 2, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
-
-  else if (k <= 64)
-    block_rbc_kernel_registers<value_idx, value_t, 64, 3, 128, 2, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
-  else if (k <= 128)
-    block_rbc_kernel_registers<value_idx, value_t, 128, 3, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
-
-  else if (k <= 256)
-    block_rbc_kernel_registers<value_idx, value_t, 256, 4, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
-
-  else if (k <= 512)
-    block_rbc_kernel_registers<value_idx, value_t, 512, 8, 64, dims, value_int>
-      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                     query,
-                                                     index.n,
-                                                     R_knn_inds,
-                                                     R_knn_dists,
-                                                     index.m,
-                                                     k,
-                                                     index.get_R_indptr().data_handle(),
-                                                     index.get_R_1nn_cols().data_handle(),
-                                                     index.get_R_1nn_dists().data_handle(),
-                                                     inds,
-                                                     dists,
-                                                     dists_counter,
-                                                     index.get_R_radius().data_handle(),
-                                                     dfunc,
-                                                     weight);
-
-  else if (k <= 1024)
-    block_rbc_kernel_registers<value_idx, value_t, 1024, 8, 64, dims, value_int>
-      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                     query,
-                                                     index.n,
-                                                     R_knn_inds,
-                                                     R_knn_dists,
-                                                     index.m,
-                                                     k,
-                                                     index.get_R_indptr().data_handle(),
-                                                     index.get_R_1nn_cols().data_handle(),
-                                                     index.get_R_1nn_dists().data_handle(),
-                                                     inds,
-                                                     dists,
-                                                     dists_counter,
-                                                     index.get_R_radius().data_handle(),
-                                                     dfunc,
-                                                     weight);
-}
-
-template <typename value_idx,
-          typename value_t,
-          typename value_int = std::uint32_t,
-          int dims           = 2,
-          typename dist_func>
-void rbc_low_dim_pass_two(raft::device_resources const& handle,
-                          const BallCoverIndex<value_idx, value_t, value_int>& index,
-                          const value_t* query,
-                          const value_int n_query_rows,
-                          value_int k,
-                          const value_idx* R_knn_inds,
-                          const value_t* R_knn_dists,
-                          dist_func& dfunc,
-                          value_idx* inds,
-                          value_t* dists,
-                          float weight,
-                          value_int* post_dists_counter)
-{
-  const value_int bitset_size = ceil(index.n_landmarks / 32.0);
-
-  rmm::device_uvector<std::uint32_t> bitset(bitset_size * n_query_rows, handle.get_stream());
-  thrust::fill(handle.get_thrust_policy(), bitset.data(), bitset.data() + bitset.size(), 0);
-
-  perform_post_filter_registers<value_idx, value_t, value_int, dims, 128>
-    <<<n_query_rows, 128, bitset_size * sizeof(std::uint32_t), handle.get_stream()>>>(
-      query,
-      index.n,
-      R_knn_inds,
-      R_knn_dists,
-      index.get_R_radius().data_handle(),
-      index.get_R().data_handle(),
-      index.n_landmarks,
-      bitset_size,
-      k,
-      dfunc,
-      bitset.data(),
-      weight);
-
-  if (k <= 32)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  32,
-                                  2,
-                                  128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-  else if (k <= 64)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  64,
-                                  3,
-                                  128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-  else if (k <= 128)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  128,
-                                  3,
-                                  128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-  else if (k <= 256)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  256,
-                                  4,
-                                  128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-  else if (k <= 512)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  512,
-                                  8,
-                                  64,
-                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-  else if (k <= 1024)
-    compute_final_dists_registers<value_idx,
-                                  value_t,
-                                  value_int,
-                                  std::uint32_t,
-                                  dist_func,
-                                  1024,
-                                  8,
-                                  64,
-                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
-}
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "registers-inl.cuh"
+#endif
 
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
+#ifdef RAFT_COMPILED
+#include "registers-ext.cuh"
+#endif
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers_types.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers_types.cuh
new file mode 100644
index 0000000000..7f4268d2dc
--- /dev/null
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers_types.cuh
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "../haversine_distance.cuh"  // compute_haversine
+#include <cstdint>                    // uint32_t
+
+namespace raft {
+namespace spatial {
+namespace knn {
+namespace detail {
+
+template <typename value_t, typename value_int = std::uint32_t>
+struct DistFunc {
+  virtual __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
+                                                                 const value_t* b,
+                                                                 const value_int n_dims)
+  {
+    return -1;
+  };
+};
+
+template <typename value_t, typename value_int = std::uint32_t>
+struct HaversineFunc : public DistFunc<value_t, value_int> {
+  __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
+                                                         const value_t* b,
+                                                         const value_int n_dims) override
+  {
+    return raft::spatial::knn::detail::compute_haversine(a[0], b[0], a[1], b[1]);
+  }
+};
+
+template <typename value_t, typename value_int = std::uint32_t>
+struct EuclideanFunc : public DistFunc<value_t, value_int> {
+  __device__ __host__ __forceinline__ value_t operator()(const value_t* a,
+                                                         const value_t* b,
+                                                         const value_int n_dims) override
+  {
+    value_t sum_sq = 0;
+    for (value_int i = 0; i < n_dims; ++i) {
+      value_t diff = a[i] - b[i];
+      sum_sq += diff * diff;
+    }
+
+    return raft::sqrt(sum_sq);
+  }
+};
+
+};  // namespace detail
+};  // namespace knn
+};  // namespace spatial
+};  // namespace raft
diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh
new file mode 100644
index 0000000000..390436939f
--- /dev/null
+++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cstddef>                           // size_t
+#include <cstdint>                           // uint32_t
+#include <raft/distance/distance_types.hpp>  // DistanceType
+#include <raft/util/raft_explicit.hpp>       // RAFT_EXPLICIT
+
+#if defined(RAFT_EXPLICIT_INSTANTIATE_ONLY)
+
+namespace raft::spatial::knn::detail {
+
+template <typename value_idx, typename value_t, bool usePrevTopKs = false>
+void fusedL2Knn(size_t D,
+                value_idx* out_inds,
+                value_t* out_dists,
+                const value_t* index,
+                const value_t* query,
+                size_t n_index_rows,
+                size_t n_query_rows,
+                int k,
+                bool rowMajorIndex,
+                bool rowMajorQuery,
+                cudaStream_t stream,
+                raft::distance::DistanceType metric) RAFT_EXPLICIT;
+
+}  // namespace raft::spatial::knn::detail
+
+#endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#define instantiate_raft_spatial_knn_detail_fusedL2Knn(Mvalue_idx, Mvalue_t, MusePrevTopKs) \
+  extern template void                                                                      \
+  raft::spatial::knn::detail::fusedL2Knn<Mvalue_idx, Mvalue_t, MusePrevTopKs>(              \
+    size_t D,                                                                               \
+    Mvalue_idx * out_inds,                                                                  \
+    Mvalue_t * out_dists,                                                                   \
+    const Mvalue_t* index,                                                                  \
+    const Mvalue_t* query,                                                                  \
+    size_t n_index_rows,                                                                    \
+    size_t n_query_rows,                                                                    \
+    int k,                                                                                  \
+    bool rowMajorIndex,                                                                     \
+    bool rowMajorQuery,                                                                     \
+    cudaStream_t stream,                                                                    \
+    raft::distance::DistanceType metric)
+
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int32_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int32_t, float, false);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int64_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int64_t, float, false);
+
+// These are used by brute_force_knn:
+instantiate_raft_spatial_knn_detail_fusedL2Knn(uint32_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(uint32_t, float, false);
+
+#undef instantiate_raft_spatial_knn_detail_fusedL2Knn
diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh
new file mode 100644
index 0000000000..4a571c1447
--- /dev/null
+++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <cub/cub.cuh>
+#include <limits>
+#include <raft/linalg/norm.cuh>
+#include <raft/neighbors/detail/faiss_select/Select.cuh>
+// TODO: Need to hide the PairwiseDistance class impl and expose to public API
+#include "processing.cuh"
+#include <raft/core/operators.hpp>
+#include <raft/distance/detail/distance.cuh>
+#include <raft/distance/detail/distance_ops/l2_exp.cuh>
+#include <raft/distance/detail/distance_ops/l2_unexp.cuh>
+#include <raft/distance/detail/pairwise_distance_base.cuh>
+#include <raft/util/cuda_utils.cuh>
+
+namespace raft {
+namespace spatial {
+namespace knn {
+namespace detail {
+
+template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT>
+DI void loadAllWarpQShmem(myWarpSelect** heapArr,
+                          Pair* shDumpKV,
+                          const IdxT m,
+                          const unsigned int numOfNN)
+{
+  const int lid = raft::laneId();
+#pragma unroll
+  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+    const auto rowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+    if (rowId < m) {
+#pragma unroll
+      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+        const int idx = j * warpSize + lid;
+        if (idx < numOfNN) {
+          Pair KVPair          = shDumpKV[rowId * numOfNN + idx];
+          heapArr[i]->warpV[j] = KVPair.key;
+          heapArr[i]->warpK[j] = KVPair.value;
+        }
+      }
+    }
+  }
+}
+
+template <typename Policy, typename Pair, typename myWarpSelect>
+DI void loadWarpQShmem(myWarpSelect* heapArr,
+                       Pair* shDumpKV,
+                       const int rowId,
+                       const unsigned int numOfNN)
+{
+  const int lid = raft::laneId();
+#pragma unroll
+  for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+    const int idx = j * warpSize + lid;
+    if (idx < numOfNN) {
+      Pair KVPair       = shDumpKV[rowId * numOfNN + idx];
+      heapArr->warpV[j] = KVPair.key;
+      heapArr->warpK[j] = KVPair.value;
+    }
+  }
+}
+
+template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT>
+DI void storeWarpQShmem(myWarpSelect* heapArr,
+                        Pair* shDumpKV,
+                        const IdxT rowId,
+                        const unsigned int numOfNN)
+{
+  const int lid = raft::laneId();
+
+#pragma unroll
+  for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+    const int idx = j * warpSize + lid;
+    if (idx < numOfNN) {
+      Pair otherKV                    = Pair(heapArr->warpV[j], heapArr->warpK[j]);
+      shDumpKV[rowId * numOfNN + idx] = otherKV;
+    }
+  }
+}
+
+template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT, typename OutT>
+DI void storeWarpQGmem(myWarpSelect** heapArr,
+                       volatile OutT* out_dists,
+                       volatile IdxT* out_inds,
+                       const IdxT m,
+                       const unsigned int numOfNN,
+                       const IdxT starty)
+{
+  const int lid = raft::laneId();
+#pragma unroll
+  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+    const auto gmemRowId = starty + i * Policy::AccThRows;
+    if (gmemRowId < m) {
+#pragma unroll
+      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+        const auto idx = j * warpSize + lid;
+        if (idx < numOfNN) {
+          out_dists[std::size_t(gmemRowId) * numOfNN + idx] = heapArr[i]->warpK[j];
+          out_inds[std::size_t(gmemRowId) * numOfNN + idx]  = (IdxT)heapArr[i]->warpV[j];
+        }
+      }
+    }
+  }
+}
+
+template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT, typename OutT>
+DI void loadPrevTopKsGmemWarpQ(myWarpSelect** heapArr,
+                               volatile OutT* out_dists,
+                               volatile IdxT* out_inds,
+                               const IdxT m,
+                               const unsigned int numOfNN,
+                               const IdxT starty)
+{
+  const int lid = raft::laneId();
+#pragma unroll
+  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+    const auto gmemRowId = starty + i * Policy::AccThRows;
+    if (gmemRowId < m) {
+#pragma unroll
+      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+        const auto idx = j * warpSize + lid;
+        if (idx < numOfNN) {
+          heapArr[i]->warpK[j] = out_dists[std::size_t(gmemRowId) * numOfNN + idx];
+          heapArr[i]->warpV[j] = (uint32_t)out_inds[std::size_t(gmemRowId) * numOfNN + idx];
+        }
+      }
+      static constexpr auto kLaneWarpKTop = myWarpSelect::kNumWarpQRegisters - 1;
+      heapArr[i]->warpKTop = raft::shfl(heapArr[i]->warpK[kLaneWarpKTop], heapArr[i]->kLane);
+    }
+  }
+}
+
+template <typename Pair, int NumWarpQRegs, typename myWarpSelect>
+DI void updateSortedWarpQ(
+  myWarpSelect& heapArr, Pair* allWarpTopKs, int rowId, int finalNumVals, int startId = 0)
+{
+  constexpr uint32_t mask = 0xffffffffu;
+  const int lid           = raft::laneId();
+  // calculate srcLane such that tid 0 -> 31, 1 -> 0,... 31 -> 30.
+  // warp around 0 to 31 required for NN > 32
+  const auto srcLane = (warpSize + (lid - 1)) & (warpSize - 1);
+
+  for (int k = startId; k < finalNumVals; k++) {
+    Pair KVPair = allWarpTopKs[rowId * (256) + k];
+#pragma unroll
+    for (int i = 0; i < NumWarpQRegs; i++) {
+      unsigned activeLanes = __ballot_sync(mask, KVPair.value < heapArr->warpK[i]);
+      if (activeLanes) {
+        Pair tempKV;
+        tempKV.value               = raft::shfl(heapArr->warpK[i], srcLane);
+        tempKV.key                 = raft::shfl(heapArr->warpV[i], srcLane);
+        const auto firstActiveLane = __ffs(activeLanes) - 1;
+        if (firstActiveLane == lid) {
+          heapArr->warpK[i] = KVPair.value;
+          heapArr->warpV[i] = KVPair.key;
+        } else if (lid > firstActiveLane) {
+          heapArr->warpK[i] = tempKV.value;
+          heapArr->warpV[i] = tempKV.key;
+        }
+        if (i == 0 && NumWarpQRegs > 1) {
+          heapArr->warpK[1] = __shfl_up_sync(mask, heapArr->warpK[1], 1);
+          heapArr->warpV[1] = __shfl_up_sync(mask, heapArr->warpV[1], 1);
+          if (lid == 0) {
+            heapArr->warpK[1] = tempKV.value;
+            heapArr->warpV[1] = tempKV.key;
+          }
+          break;
+        }
+      }
+    }
+  }
+}
+
+template <typename DataT,
+          typename OutT,
+          typename IdxT,
+          typename Policy,
+          typename OpT,
+          typename FinalLambda,
+          int NumWarpQ,
+          int NumThreadQ,
+          bool usePrevTopKs = false,
+          bool isRowMajor   = true>
+__global__ __launch_bounds__(Policy::Nthreads, 2) void fusedL2kNN(const DataT* x,
+                                                                  const DataT* y,
+                                                                  const DataT* _xn,
+                                                                  const DataT* _yn,
+                                                                  const IdxT m,
+                                                                  const IdxT n,
+                                                                  const IdxT k,
+                                                                  const IdxT lda,
+                                                                  const IdxT ldb,
+                                                                  const IdxT ldd,
+                                                                  OpT distance_op,
+                                                                  FinalLambda fin_op,
+                                                                  unsigned int numOfNN,
+                                                                  volatile int* mutexes,
+                                                                  volatile OutT* out_dists,
+                                                                  volatile IdxT* out_inds)
+{
+  using AccT = typename OpT::AccT;
+  extern __shared__ char smem[];
+
+  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
+  constexpr auto identity = std::numeric_limits<AccT>::max();
+  constexpr auto keyMax   = std::numeric_limits<uint32_t>::max();
+  constexpr auto Dir      = false;
+  using namespace raft::neighbors::detail::faiss_select;
+  typedef WarpSelect<AccT, uint32_t, Dir, Comparator<AccT>, NumWarpQ, NumThreadQ, 32> myWarpSelect;
+
+  auto rowEpilog_lambda =
+    [m, n, &distance_op, numOfNN, out_dists, out_inds, mutexes] __device__(IdxT gridStrideY) {
+      if (gridDim.x == 1) { return; }
+
+      // Use ::template to disambiguate (See:
+      // https://en.cppreference.com/w/cpp/language/dependent_name)
+      int smem_offset = OpT::template shared_mem_size<Policy>();
+      Pair* shDumpKV  = (Pair*)(&smem[smem_offset]);
+
+      const int lid     = threadIdx.x % warpSize;
+      const IdxT starty = gridStrideY + (threadIdx.x / Policy::AccThCols);
+
+      //  0 -> consumer done consuming the buffer.
+      // -1 -> consumer started consuming the buffer
+      // -2 -> producer done filling the buffer
+      //  1 -> prod acquired to fill the buffer
+      if (blockIdx.x == 0) {
+        auto cta_processed = 0;
+        myWarpSelect heapArr1(identity, keyMax, numOfNN);
+        myWarpSelect heapArr2(identity, keyMax, numOfNN);
+        myWarpSelect* heapArr[] = {&heapArr1, &heapArr2};
+        __syncwarp();
+
+        loadAllWarpQShmem<Policy, Pair>(heapArr, &shDumpKV[0], m, numOfNN);
+
+        while (cta_processed < gridDim.x - 1) {
+          if (threadIdx.x == 0) {
+            while (atomicCAS((int*)&mutexes[gridStrideY / Policy::Mblk], -2, -1) != -2)
+              ;
+          }
+          __threadfence();
+          __syncthreads();
+
+#pragma unroll
+          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+            const auto rowId = starty + i * Policy::AccThRows;
+            if (rowId < m) {
+#pragma unroll
+              for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+                Pair otherKV;
+                otherKV.value  = identity;
+                otherKV.key    = keyMax;
+                const auto idx = j * warpSize + lid;
+                if (idx < numOfNN) {
+                  otherKV.value         = out_dists[rowId * numOfNN + idx];
+                  otherKV.key           = (uint32_t)out_inds[rowId * numOfNN + idx];
+                  const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+                  shDumpKV[shMemRowId * numOfNN + idx] = otherKV;
+                }
+              }
+            }
+          }
+          __threadfence();
+          __syncthreads();
+
+          if (threadIdx.x == 0) { atomicExch((int*)&mutexes[gridStrideY / Policy::Mblk], 0); }
+          __threadfence();
+
+        // Perform merging of otherKV with topk's across warp.
+#pragma unroll
+          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+            const auto rowId = starty + i * Policy::AccThRows;
+            if (rowId < m) {
+#pragma unroll
+              for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
+                Pair otherKV;
+                otherKV.value  = identity;
+                otherKV.key    = keyMax;
+                const auto idx = j * warpSize + lid;
+                if (idx < numOfNN) {
+                  const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+                  otherKV               = shDumpKV[shMemRowId * numOfNN + idx];
+                }
+                heapArr[i]->add(otherKV.value, otherKV.key);
+              }
+            }
+          }
+          cta_processed++;
+        }
+#pragma unroll
+        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+          const auto rowId = starty + i * Policy::AccThRows;
+          if (rowId < m) {
+            bool needSort = (heapArr[i]->numVals > 0);
+            needSort      = __any_sync(0xffffffff, needSort);
+            if (needSort) { heapArr[i]->reduce(); }
+          }
+        }
+        storeWarpQGmem<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
+      } else {
+        if (threadIdx.x == 0) {
+          while (atomicCAS((int*)&mutexes[gridStrideY / Policy::Mblk], 0, 1) != 0)
+            ;
+        }
+        __threadfence();
+        __syncthreads();
+
+#pragma unroll
+        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+          const auto rowId = starty + i * Policy::AccThRows;
+          if (rowId < m) {
+            for (int idx = lid; idx < numOfNN; idx += warpSize) {
+              const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+              Pair KVPair           = shDumpKV[shMemRowId * numOfNN + idx];
+              out_dists[rowId * numOfNN + idx] = KVPair.value;
+              out_inds[rowId * numOfNN + idx]  = (IdxT)KVPair.key;
+            }
+          }
+        }
+        __threadfence();
+        __syncthreads();
+
+        if (threadIdx.x == 0) { atomicExch((int*)&mutexes[gridStrideY / Policy::Mblk], -2); }
+        __threadfence();
+      }
+    };
+
+  // epilogue operation lambda for final value calculation
+  auto epilog_lambda =
+    [&distance_op, numOfNN, m, n, ldd, out_dists, out_inds, keyMax, identity] __device__(
+      AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh],
+      DataT * regxn,
+      DataT * regyn,
+      IdxT gridStrideX,
+      IdxT gridStrideY) {
+      // Use ::template to disambiguate (See:
+      // https://en.cppreference.com/w/cpp/language/dependent_name)
+      int smem_offset = OpT::template shared_mem_size<Policy>();
+      Pair* shDumpKV  = (Pair*)(&smem[smem_offset]);
+
+      constexpr uint32_t mask = 0xffffffffu;
+      const IdxT starty       = gridStrideY + (threadIdx.x / Policy::AccThCols);
+      const IdxT startx       = gridStrideX + (threadIdx.x % Policy::AccThCols);
+      const int lid           = raft::laneId();
+
+      myWarpSelect heapArr1(identity, keyMax, numOfNN);
+      myWarpSelect heapArr2(identity, keyMax, numOfNN);
+      myWarpSelect* heapArr[] = {&heapArr1, &heapArr2};
+      if (usePrevTopKs) {
+        if (gridStrideX == blockIdx.x * Policy::Nblk) {
+          loadPrevTopKsGmemWarpQ<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
+        }
+      }
+
+      if (gridStrideX > blockIdx.x * Policy::Nblk) {
+#pragma unroll
+        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+          const auto rowId     = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+          Pair tempKV          = shDumpKV[(rowId * numOfNN) + numOfNN - 1];
+          heapArr[i]->warpKTop = tempKV.value;
+        }
+
+        // total vals can atmost be 256, (32*8)
+        int numValsWarpTopK[Policy::AccRowsPerTh];
+        int anyWarpTopKs = 0;
+#pragma unroll
+        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+          const auto rowId   = starty + i * Policy::AccThRows;
+          numValsWarpTopK[i] = 0;
+          if (rowId < m) {
+#pragma unroll
+            for (int j = 0; j < Policy::AccColsPerTh; ++j) {
+              const auto colId = startx + j * Policy::AccThCols;
+              if (colId < ldd) {
+                if (acc[i][j] < heapArr[i]->warpKTop) { numValsWarpTopK[i]++; }
+              }
+            }
+            anyWarpTopKs += numValsWarpTopK[i];
+          }
+        }
+        anyWarpTopKs = __syncthreads_or(anyWarpTopKs > 0);
+        if (anyWarpTopKs) {
+          Pair* allWarpTopKs = (Pair*)(&smem[0]);
+          uint32_t needScanSort[Policy::AccRowsPerTh];
+
+#pragma unroll
+          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+            const auto gmemRowId = starty + i * Policy::AccThRows;
+            needScanSort[i]      = 0;
+            if (gmemRowId < m) {
+              int myVals      = numValsWarpTopK[i];
+              needScanSort[i] = __ballot_sync(mask, myVals > 0);
+              if (needScanSort[i]) {
+#pragma unroll
+                for (unsigned int k = 1; k <= 16; k *= 2) {
+                  const unsigned int n = __shfl_up_sync(mask, numValsWarpTopK[i], k);
+                  if (lid >= k) { numValsWarpTopK[i] += n; }
+                }
+              }
+              // As each thread will know its total vals to write.
+              // we only store its starting location.
+              numValsWarpTopK[i] -= myVals;
+            }
+
+            if (needScanSort[i]) {
+              const auto rowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+              if (gmemRowId < m) {
+                if (needScanSort[i] & ((uint32_t)1 << lid)) {
+#pragma unroll
+                  for (int j = 0; j < Policy::AccColsPerTh; ++j) {
+                    const auto colId = startx + j * Policy::AccThCols;
+                    if (colId < ldd) {
+                      if (acc[i][j] < heapArr[i]->warpKTop) {
+                        Pair otherKV                                     = {colId, acc[i][j]};
+                        allWarpTopKs[rowId * (256) + numValsWarpTopK[i]] = otherKV;
+                        numValsWarpTopK[i]++;
+                      }
+                    }
+                  }
+                }
+                __syncwarp();
+                const int finalNumVals = raft::shfl(numValsWarpTopK[i], 31);
+                loadWarpQShmem<Policy, Pair>(heapArr[i], &shDumpKV[0], rowId, numOfNN);
+                updateSortedWarpQ<Pair, myWarpSelect::kNumWarpQRegisters>(
+                  heapArr[i], &allWarpTopKs[0], rowId, finalNumVals);
+              }
+            }
+          }
+          __syncthreads();
+#pragma unroll
+          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+            if (needScanSort[i]) {
+              const auto rowId     = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+              const auto gmemRowId = starty + i * Policy::AccThRows;
+              if (gmemRowId < m) {
+                storeWarpQShmem<Policy, Pair>(heapArr[i], shDumpKV, rowId, numOfNN);
+              }
+            }
+          }
+        }
+      } else {
+#pragma unroll
+        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
+          const auto gmemRowId  = starty + i * Policy::AccThRows;
+          const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
+          if (gmemRowId < m) {
+#pragma unroll
+            for (int j = 0; j < Policy::AccColsPerTh; ++j) {
+              const auto colId = startx + j * Policy::AccThCols;
+              Pair otherKV     = {keyMax, identity};
+              if (colId < ldd) {
+                otherKV.value = acc[i][j];
+                otherKV.key   = colId;
+              }
+              heapArr[i]->add(otherKV.value, otherKV.key);
+            }
+
+            bool needSort = (heapArr[i]->numVals > 0);
+            needSort      = __any_sync(mask, needSort);
+            if (needSort) { heapArr[i]->reduce(); }
+            storeWarpQShmem<Policy, Pair>(heapArr[i], shDumpKV, shMemRowId, numOfNN);
+          }
+        }
+      }
+
+      if (((gridStrideX + Policy::Nblk * gridDim.x) >= n) && gridDim.x == 1) {
+        // This is last iteration of grid stride X
+        loadAllWarpQShmem<Policy, Pair>(heapArr, &shDumpKV[0], m, numOfNN);
+        storeWarpQGmem<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
+      }
+    };
+
+  constexpr bool write_out = false;
+  raft::distance::detail::PairwiseDistances<DataT,
+                                            OutT,
+                                            IdxT,
+                                            Policy,
+                                            OpT,
+                                            decltype(epilog_lambda),
+                                            FinalLambda,
+                                            decltype(rowEpilog_lambda),
+                                            isRowMajor,
+                                            write_out>
+    obj(x,
+        y,
+        m,
+        n,
+        k,
+        lda,
+        ldb,
+        ldd,
+        _xn,
+        _yn,
+        nullptr,  // output ptr, can be null as write_out == false.
+        smem,
+        distance_op,
+        epilog_lambda,
+        fin_op,
+        rowEpilog_lambda);
+  obj.run();
+}
+
+template <typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT,
+          int VecLen,
+          bool usePrevTopKs,
+          bool isRowMajor>
+void fusedL2UnexpKnnImpl(const DataT* x,
+                         const DataT* y,
+                         IdxT m,
+                         IdxT n,
+                         IdxT k,
+                         IdxT lda,
+                         IdxT ldb,
+                         IdxT ldd,
+                         bool sqrt,
+                         OutT* out_dists,
+                         IdxT* out_inds,
+                         IdxT numOfNN,
+                         cudaStream_t stream,
+                         void* workspace,
+                         size_t& worksize)
+{
+  typedef typename raft::linalg::Policy2x8<DataT, 1>::Policy RowPolicy;
+  typedef typename raft::linalg::Policy4x4<DataT, VecLen>::ColPolicy ColPolicy;
+
+  typedef typename std::conditional<true, RowPolicy, ColPolicy>::type KPolicy;
+
+  ASSERT(isRowMajor, "Only Row major inputs are allowed");
+
+  dim3 blk(KPolicy::Nthreads);
+  // Accumulation operation lambda
+  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
+
+  raft::distance::detail::ops::l2_unexp_distance_op<DataT, AccT, IdxT> distance_op{sqrt};
+  raft::identity_op fin_op{};
+
+  if constexpr (isRowMajor) {
+    constexpr auto fusedL2UnexpKnn32RowMajor = fusedL2kNN<DataT,
+                                                          OutT,
+                                                          IdxT,
+                                                          KPolicy,
+                                                          decltype(distance_op),
+                                                          decltype(fin_op),
+                                                          32,
+                                                          2,
+                                                          usePrevTopKs,
+                                                          isRowMajor>;
+    constexpr auto fusedL2UnexpKnn64RowMajor = fusedL2kNN<DataT,
+                                                          OutT,
+                                                          IdxT,
+                                                          KPolicy,
+                                                          decltype(distance_op),
+                                                          decltype(fin_op),
+                                                          64,
+                                                          3,
+                                                          usePrevTopKs,
+                                                          isRowMajor>;
+
+    auto fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn32RowMajor;
+    if (numOfNN <= 32) {
+      fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn32RowMajor;
+    } else if (numOfNN <= 64) {
+      fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn64RowMajor;
+    } else {
+      ASSERT(numOfNN <= 64, "fusedL2kNN: num of nearest neighbors must be <= 64");
+    }
+
+    const auto sharedMemSize =
+      distance_op.template shared_mem_size<KPolicy>() + KPolicy::Mblk * numOfNN * sizeof(Pair);
+
+    dim3 grid = raft::distance::detail::launchConfigGenerator<KPolicy>(
+      m, n, sharedMemSize, fusedL2UnexpKnnRowMajor);
+
+    if (grid.x > 1) {
+      const auto numMutexes = raft::ceildiv<int>(m, KPolicy::Mblk);
+      if (workspace == nullptr || worksize < (sizeof(int32_t) * numMutexes)) {
+        worksize = sizeof(int32_t) * numMutexes;
+        return;
+      } else {
+        RAFT_CUDA_TRY(cudaMemsetAsync(workspace, 0, sizeof(int32_t) * numMutexes, stream));
+      }
+    }
+
+    fusedL2UnexpKnnRowMajor<<<grid, blk, sharedMemSize, stream>>>(x,
+                                                                  y,
+                                                                  nullptr,
+                                                                  nullptr,
+                                                                  m,
+                                                                  n,
+                                                                  k,
+                                                                  lda,
+                                                                  ldb,
+                                                                  ldd,
+                                                                  distance_op,
+                                                                  fin_op,
+                                                                  (uint32_t)numOfNN,
+                                                                  (int*)workspace,
+                                                                  out_dists,
+                                                                  out_inds);
+  } else {
+  }
+
+  RAFT_CUDA_TRY(cudaGetLastError());
+}
+
+template <typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT,
+          bool usePrevTopKs,
+          bool isRowMajor>
+void fusedL2UnexpKnn(IdxT m,
+                     IdxT n,
+                     IdxT k,
+                     IdxT lda,
+                     IdxT ldb,
+                     IdxT ldd,
+                     const DataT* x,
+                     const DataT* y,
+                     bool sqrt,
+                     OutT* out_dists,
+                     IdxT* out_inds,
+                     IdxT numOfNN,
+                     cudaStream_t stream,
+                     void* workspace,
+                     size_t& worksize)
+{
+  size_t bytesA = sizeof(DataT) * lda;
+  size_t bytesB = sizeof(DataT) * ldb;
+  if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) {
+    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 16 / sizeof(DataT), usePrevTopKs, isRowMajor>(
+      x,
+      y,
+      m,
+      n,
+      k,
+      lda,
+      ldb,
+      ldd,
+      sqrt,
+      out_dists,
+      out_inds,
+      numOfNN,
+      stream,
+      workspace,
+      worksize);
+  } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) {
+    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 8 / sizeof(DataT), usePrevTopKs, isRowMajor>(
+      x,
+      y,
+      m,
+      n,
+      k,
+      lda,
+      ldb,
+      ldd,
+      sqrt,
+      out_dists,
+      out_inds,
+      numOfNN,
+      stream,
+      workspace,
+      worksize);
+  } else {
+    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 1, usePrevTopKs, isRowMajor>(x,
+                                                                              y,
+                                                                              m,
+                                                                              n,
+                                                                              k,
+                                                                              lda,
+                                                                              ldb,
+                                                                              ldd,
+                                                                              sqrt,
+                                                                              out_dists,
+                                                                              out_inds,
+                                                                              numOfNN,
+                                                                              stream,
+                                                                              workspace,
+                                                                              worksize);
+  }
+}
+
+template <typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT,
+          int VecLen,
+          bool usePrevTopKs,
+          bool isRowMajor>
+void fusedL2ExpKnnImpl(const DataT* x,
+                       const DataT* y,
+                       IdxT m,
+                       IdxT n,
+                       IdxT k,
+                       IdxT lda,
+                       IdxT ldb,
+                       IdxT ldd,
+                       bool sqrt,
+                       OutT* out_dists,
+                       IdxT* out_inds,
+                       IdxT numOfNN,
+                       cudaStream_t stream,
+                       void* workspace,
+                       size_t& worksize)
+{
+  typedef typename raft::linalg::Policy2x8<DataT, 1>::Policy RowPolicy;
+  typedef typename raft::linalg::Policy4x4<DataT, VecLen>::ColPolicy ColPolicy;
+
+  typedef typename std::conditional<true, RowPolicy, ColPolicy>::type KPolicy;
+
+  ASSERT(isRowMajor, "Only Row major inputs are allowed");
+
+  ASSERT(!(((x != y) && (worksize < (m + n) * sizeof(AccT))) || (worksize < m * sizeof(AccT))),
+         "workspace size error");
+  ASSERT(workspace != nullptr, "workspace is null");
+
+  dim3 blk(KPolicy::Nthreads);
+
+  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
+
+  raft::distance::detail::ops::l2_exp_distance_op<DataT, AccT, IdxT> distance_op{sqrt};
+  raft::identity_op fin_op{};
+
+  if constexpr (isRowMajor) {
+    constexpr auto fusedL2ExpKnn32RowMajor = fusedL2kNN<DataT,
+                                                        OutT,
+                                                        IdxT,
+                                                        KPolicy,
+                                                        decltype(distance_op),
+                                                        decltype(fin_op),
+                                                        32,
+                                                        2,
+                                                        usePrevTopKs,
+                                                        isRowMajor>;
+    constexpr auto fusedL2ExpKnn64RowMajor = fusedL2kNN<DataT,
+                                                        OutT,
+                                                        IdxT,
+                                                        KPolicy,
+                                                        decltype(distance_op),
+                                                        decltype(fin_op),
+                                                        64,
+                                                        3,
+                                                        usePrevTopKs,
+                                                        isRowMajor>;
+
+    auto fusedL2ExpKnnRowMajor = fusedL2ExpKnn32RowMajor;
+    if (numOfNN <= 32) {
+      fusedL2ExpKnnRowMajor = fusedL2ExpKnn32RowMajor;
+    } else if (numOfNN <= 64) {
+      fusedL2ExpKnnRowMajor = fusedL2ExpKnn64RowMajor;
+    } else {
+      ASSERT(numOfNN <= 64, "fusedL2kNN: num of nearest neighbors must be <= 64");
+    }
+
+    const auto sharedMemSize =
+      distance_op.template shared_mem_size<KPolicy>() + (KPolicy::Mblk * numOfNN * sizeof(Pair));
+    dim3 grid = raft::distance::detail::launchConfigGenerator<KPolicy>(
+      m, n, sharedMemSize, fusedL2ExpKnnRowMajor);
+    int32_t* mutexes = nullptr;
+    if (grid.x > 1) {
+      const auto numMutexes   = raft::ceildiv<int>(m, KPolicy::Mblk);
+      const auto normsSize    = (x != y) ? (m + n) * sizeof(DataT) : n * sizeof(DataT);
+      const auto requiredSize = sizeof(int32_t) * numMutexes + normsSize;
+      if (worksize < requiredSize) {
+        worksize = requiredSize;
+        return;
+      } else {
+        mutexes = (int32_t*)((char*)workspace + normsSize);
+        RAFT_CUDA_TRY(cudaMemsetAsync(mutexes, 0, sizeof(int32_t) * numMutexes, stream));
+      }
+    }
+
+    DataT* xn = (DataT*)workspace;
+    DataT* yn = (DataT*)workspace;
+
+    if (x != y) {
+      yn += m;
+      raft::linalg::rowNorm(
+        xn, x, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
+      raft::linalg::rowNorm(
+        yn, y, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
+    } else {
+      raft::linalg::rowNorm(
+        xn, x, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
+    }
+    fusedL2ExpKnnRowMajor<<<grid, blk, sharedMemSize, stream>>>(x,
+                                                                y,
+                                                                xn,
+                                                                yn,
+                                                                m,
+                                                                n,
+                                                                k,
+                                                                lda,
+                                                                ldb,
+                                                                ldd,
+                                                                distance_op,
+                                                                fin_op,
+                                                                (uint32_t)numOfNN,
+                                                                mutexes,
+                                                                out_dists,
+                                                                out_inds);
+  } else {
+  }
+
+  RAFT_CUDA_TRY(cudaGetLastError());
+}
+
+template <typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT,
+          bool usePrevTopKs,
+          bool isRowMajor>
+void fusedL2ExpKnn(IdxT m,
+                   IdxT n,
+                   IdxT k,
+                   IdxT lda,
+                   IdxT ldb,
+                   IdxT ldd,
+                   const DataT* x,
+                   const DataT* y,
+                   bool sqrt,
+                   OutT* out_dists,
+                   IdxT* out_inds,
+                   IdxT numOfNN,
+                   cudaStream_t stream,
+                   void* workspace,
+                   size_t& worksize)
+{
+  size_t bytesA = sizeof(DataT) * lda;
+  size_t bytesB = sizeof(DataT) * ldb;
+  if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) {
+    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 16 / sizeof(DataT), usePrevTopKs, isRowMajor>(
+      x,
+      y,
+      m,
+      n,
+      k,
+      lda,
+      ldb,
+      ldd,
+      sqrt,
+      out_dists,
+      out_inds,
+      numOfNN,
+      stream,
+      workspace,
+      worksize);
+  } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) {
+    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 8 / sizeof(DataT), usePrevTopKs, isRowMajor>(
+      x,
+      y,
+      m,
+      n,
+      k,
+      lda,
+      ldb,
+      ldd,
+      sqrt,
+      out_dists,
+      out_inds,
+      numOfNN,
+      stream,
+      workspace,
+      worksize);
+  } else {
+    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 1, usePrevTopKs, isRowMajor>(x,
+                                                                            y,
+                                                                            m,
+                                                                            n,
+                                                                            k,
+                                                                            lda,
+                                                                            ldb,
+                                                                            ldd,
+                                                                            sqrt,
+                                                                            out_dists,
+                                                                            out_inds,
+                                                                            numOfNN,
+                                                                            stream,
+                                                                            workspace,
+                                                                            worksize);
+  }
+}
+
+/**
+ * Compute the k-nearest neighbors using L2 expanded/unexpanded distance.
+
+ * @tparam value_idx
+ * @tparam value_t
+ * @param[out] out_inds output indices array on device (size n_query_rows * k)
+ * @param[out] out_dists output dists array on device (size n_query_rows * k)
+ * @param[in] index input index array on device (size n_index_rows * D)
+ * @param[in] query input query array on device (size n_query_rows * D)
+ * @param[in] n_index_rows number of rows in index array
+ * @param[in] n_query_rows number of rows in query array
+ * @param[in] k number of closest neighbors to return
+ * @param[in] rowMajorIndex are the index arrays in row-major layout?
+ * @param[in] rowMajorQuery are the query array in row-major layout?
+ * @param[in] stream stream to order kernel launch
+ */
+template <typename value_idx, typename value_t, bool usePrevTopKs = false>
+void fusedL2Knn(size_t D,
+                value_idx* out_inds,
+                value_t* out_dists,
+                const value_t* index,
+                const value_t* query,
+                size_t n_index_rows,
+                size_t n_query_rows,
+                int k,
+                bool rowMajorIndex,
+                bool rowMajorQuery,
+                cudaStream_t stream,
+                raft::distance::DistanceType metric)
+{
+  // Validate the input data
+  ASSERT(k > 0, "l2Knn: k must be > 0");
+  ASSERT(D > 0, "l2Knn: D must be > 0");
+  ASSERT(n_index_rows > 0, "l2Knn: n_index_rows must be > 0");
+  ASSERT(index, "l2Knn: index must be provided (passed null)");
+  ASSERT(n_query_rows > 0, "l2Knn: n_query_rows must be > 0");
+  ASSERT(query, "l2Knn: query must be provided (passed null)");
+  ASSERT(out_dists, "l2Knn: out_dists must be provided (passed null)");
+  ASSERT(out_inds, "l2Knn: out_inds must be provided (passed null)");
+  // Currently we only support same layout for x & y inputs.
+  ASSERT(rowMajorIndex == rowMajorQuery,
+         "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
+  // TODO: Add support for column major layout
+  ASSERT(rowMajorIndex == true, "l2Knn: only rowMajor inputs are supported for now.");
+
+  // Even for L2 Sqrt distance case we use non-sqrt version as FAISS bfKNN only support
+  // non-sqrt metric & some tests in RAFT/cuML (like Linkage) fails if we use L2 sqrt.
+  constexpr bool sqrt = false;
+
+  size_t worksize = 0, tempWorksize = 0;
+  rmm::device_uvector<char> workspace(worksize, stream);
+  value_idx lda = D, ldb = D, ldd = n_index_rows;
+
+  switch (metric) {
+    case raft::distance::DistanceType::L2SqrtExpanded:
+    case raft::distance::DistanceType::L2Expanded:
+      tempWorksize = raft::distance::detail::
+        getWorkspaceSize<raft::distance::DistanceType::L2Expanded, float, float, float, value_idx>(
+          query, index, n_query_rows, n_index_rows, D);
+      worksize = tempWorksize;
+      workspace.resize(worksize, stream);
+      fusedL2ExpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
+                                                                              n_index_rows,
+                                                                              D,
+                                                                              lda,
+                                                                              ldb,
+                                                                              ldd,
+                                                                              query,
+                                                                              index,
+                                                                              sqrt,
+                                                                              out_dists,
+                                                                              out_inds,
+                                                                              k,
+                                                                              stream,
+                                                                              workspace.data(),
+                                                                              worksize);
+      if (worksize > tempWorksize) {
+        workspace.resize(worksize, stream);
+        fusedL2ExpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
+                                                                                n_index_rows,
+                                                                                D,
+                                                                                lda,
+                                                                                ldb,
+                                                                                ldd,
+                                                                                query,
+                                                                                index,
+                                                                                sqrt,
+                                                                                out_dists,
+                                                                                out_inds,
+                                                                                k,
+                                                                                stream,
+                                                                                workspace.data(),
+                                                                                worksize);
+      }
+      break;
+    case raft::distance::DistanceType::L2Unexpanded:
+    case raft::distance::DistanceType::L2SqrtUnexpanded:
+      fusedL2UnexpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
+                                                                                n_index_rows,
+                                                                                D,
+                                                                                lda,
+                                                                                ldb,
+                                                                                ldd,
+                                                                                query,
+                                                                                index,
+                                                                                sqrt,
+                                                                                out_dists,
+                                                                                out_inds,
+                                                                                k,
+                                                                                stream,
+                                                                                workspace.data(),
+                                                                                worksize);
+      if (worksize) {
+        workspace.resize(worksize, stream);
+        fusedL2UnexpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
+                                                                                  n_index_rows,
+                                                                                  D,
+                                                                                  lda,
+                                                                                  ldb,
+                                                                                  ldd,
+                                                                                  query,
+                                                                                  index,
+                                                                                  sqrt,
+                                                                                  out_dists,
+                                                                                  out_inds,
+                                                                                  k,
+                                                                                  stream,
+                                                                                  workspace.data(),
+                                                                                  worksize);
+      }
+      break;
+    default: printf("only L2 distance metric is supported\n"); break;
+  };
+}
+
+}  // namespace detail
+}  // namespace knn
+}  // namespace spatial
+}  // namespace raft
diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
index 4a571c1447..8cc02c7c78 100644
--- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
+++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
@@ -14,1027 +14,11 @@
  * limitations under the License.
  */
 #pragma once
-#include <cub/cub.cuh>
-#include <limits>
-#include <raft/linalg/norm.cuh>
-#include <raft/neighbors/detail/faiss_select/Select.cuh>
-// TODO: Need to hide the PairwiseDistance class impl and expose to public API
-#include "processing.cuh"
-#include <raft/core/operators.hpp>
-#include <raft/distance/detail/distance.cuh>
-#include <raft/distance/detail/distance_ops/l2_exp.cuh>
-#include <raft/distance/detail/distance_ops/l2_unexp.cuh>
-#include <raft/distance/detail/pairwise_distance_base.cuh>
-#include <raft/util/cuda_utils.cuh>
 
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "fused_l2_knn-inl.cuh"
+#endif
 
-template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT>
-DI void loadAllWarpQShmem(myWarpSelect** heapArr,
-                          Pair* shDumpKV,
-                          const IdxT m,
-                          const unsigned int numOfNN)
-{
-  const int lid = raft::laneId();
-#pragma unroll
-  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-    const auto rowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-    if (rowId < m) {
-#pragma unroll
-      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-        const int idx = j * warpSize + lid;
-        if (idx < numOfNN) {
-          Pair KVPair          = shDumpKV[rowId * numOfNN + idx];
-          heapArr[i]->warpV[j] = KVPair.key;
-          heapArr[i]->warpK[j] = KVPair.value;
-        }
-      }
-    }
-  }
-}
-
-template <typename Policy, typename Pair, typename myWarpSelect>
-DI void loadWarpQShmem(myWarpSelect* heapArr,
-                       Pair* shDumpKV,
-                       const int rowId,
-                       const unsigned int numOfNN)
-{
-  const int lid = raft::laneId();
-#pragma unroll
-  for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-    const int idx = j * warpSize + lid;
-    if (idx < numOfNN) {
-      Pair KVPair       = shDumpKV[rowId * numOfNN + idx];
-      heapArr->warpV[j] = KVPair.key;
-      heapArr->warpK[j] = KVPair.value;
-    }
-  }
-}
-
-template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT>
-DI void storeWarpQShmem(myWarpSelect* heapArr,
-                        Pair* shDumpKV,
-                        const IdxT rowId,
-                        const unsigned int numOfNN)
-{
-  const int lid = raft::laneId();
-
-#pragma unroll
-  for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-    const int idx = j * warpSize + lid;
-    if (idx < numOfNN) {
-      Pair otherKV                    = Pair(heapArr->warpV[j], heapArr->warpK[j]);
-      shDumpKV[rowId * numOfNN + idx] = otherKV;
-    }
-  }
-}
-
-template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT, typename OutT>
-DI void storeWarpQGmem(myWarpSelect** heapArr,
-                       volatile OutT* out_dists,
-                       volatile IdxT* out_inds,
-                       const IdxT m,
-                       const unsigned int numOfNN,
-                       const IdxT starty)
-{
-  const int lid = raft::laneId();
-#pragma unroll
-  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-    const auto gmemRowId = starty + i * Policy::AccThRows;
-    if (gmemRowId < m) {
-#pragma unroll
-      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-        const auto idx = j * warpSize + lid;
-        if (idx < numOfNN) {
-          out_dists[std::size_t(gmemRowId) * numOfNN + idx] = heapArr[i]->warpK[j];
-          out_inds[std::size_t(gmemRowId) * numOfNN + idx]  = (IdxT)heapArr[i]->warpV[j];
-        }
-      }
-    }
-  }
-}
-
-template <typename Policy, typename Pair, typename myWarpSelect, typename IdxT, typename OutT>
-DI void loadPrevTopKsGmemWarpQ(myWarpSelect** heapArr,
-                               volatile OutT* out_dists,
-                               volatile IdxT* out_inds,
-                               const IdxT m,
-                               const unsigned int numOfNN,
-                               const IdxT starty)
-{
-  const int lid = raft::laneId();
-#pragma unroll
-  for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-    const auto gmemRowId = starty + i * Policy::AccThRows;
-    if (gmemRowId < m) {
-#pragma unroll
-      for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-        const auto idx = j * warpSize + lid;
-        if (idx < numOfNN) {
-          heapArr[i]->warpK[j] = out_dists[std::size_t(gmemRowId) * numOfNN + idx];
-          heapArr[i]->warpV[j] = (uint32_t)out_inds[std::size_t(gmemRowId) * numOfNN + idx];
-        }
-      }
-      static constexpr auto kLaneWarpKTop = myWarpSelect::kNumWarpQRegisters - 1;
-      heapArr[i]->warpKTop = raft::shfl(heapArr[i]->warpK[kLaneWarpKTop], heapArr[i]->kLane);
-    }
-  }
-}
-
-template <typename Pair, int NumWarpQRegs, typename myWarpSelect>
-DI void updateSortedWarpQ(
-  myWarpSelect& heapArr, Pair* allWarpTopKs, int rowId, int finalNumVals, int startId = 0)
-{
-  constexpr uint32_t mask = 0xffffffffu;
-  const int lid           = raft::laneId();
-  // calculate srcLane such that tid 0 -> 31, 1 -> 0,... 31 -> 30.
-  // warp around 0 to 31 required for NN > 32
-  const auto srcLane = (warpSize + (lid - 1)) & (warpSize - 1);
-
-  for (int k = startId; k < finalNumVals; k++) {
-    Pair KVPair = allWarpTopKs[rowId * (256) + k];
-#pragma unroll
-    for (int i = 0; i < NumWarpQRegs; i++) {
-      unsigned activeLanes = __ballot_sync(mask, KVPair.value < heapArr->warpK[i]);
-      if (activeLanes) {
-        Pair tempKV;
-        tempKV.value               = raft::shfl(heapArr->warpK[i], srcLane);
-        tempKV.key                 = raft::shfl(heapArr->warpV[i], srcLane);
-        const auto firstActiveLane = __ffs(activeLanes) - 1;
-        if (firstActiveLane == lid) {
-          heapArr->warpK[i] = KVPair.value;
-          heapArr->warpV[i] = KVPair.key;
-        } else if (lid > firstActiveLane) {
-          heapArr->warpK[i] = tempKV.value;
-          heapArr->warpV[i] = tempKV.key;
-        }
-        if (i == 0 && NumWarpQRegs > 1) {
-          heapArr->warpK[1] = __shfl_up_sync(mask, heapArr->warpK[1], 1);
-          heapArr->warpV[1] = __shfl_up_sync(mask, heapArr->warpV[1], 1);
-          if (lid == 0) {
-            heapArr->warpK[1] = tempKV.value;
-            heapArr->warpV[1] = tempKV.key;
-          }
-          break;
-        }
-      }
-    }
-  }
-}
-
-template <typename DataT,
-          typename OutT,
-          typename IdxT,
-          typename Policy,
-          typename OpT,
-          typename FinalLambda,
-          int NumWarpQ,
-          int NumThreadQ,
-          bool usePrevTopKs = false,
-          bool isRowMajor   = true>
-__global__ __launch_bounds__(Policy::Nthreads, 2) void fusedL2kNN(const DataT* x,
-                                                                  const DataT* y,
-                                                                  const DataT* _xn,
-                                                                  const DataT* _yn,
-                                                                  const IdxT m,
-                                                                  const IdxT n,
-                                                                  const IdxT k,
-                                                                  const IdxT lda,
-                                                                  const IdxT ldb,
-                                                                  const IdxT ldd,
-                                                                  OpT distance_op,
-                                                                  FinalLambda fin_op,
-                                                                  unsigned int numOfNN,
-                                                                  volatile int* mutexes,
-                                                                  volatile OutT* out_dists,
-                                                                  volatile IdxT* out_inds)
-{
-  using AccT = typename OpT::AccT;
-  extern __shared__ char smem[];
-
-  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
-  constexpr auto identity = std::numeric_limits<AccT>::max();
-  constexpr auto keyMax   = std::numeric_limits<uint32_t>::max();
-  constexpr auto Dir      = false;
-  using namespace raft::neighbors::detail::faiss_select;
-  typedef WarpSelect<AccT, uint32_t, Dir, Comparator<AccT>, NumWarpQ, NumThreadQ, 32> myWarpSelect;
-
-  auto rowEpilog_lambda =
-    [m, n, &distance_op, numOfNN, out_dists, out_inds, mutexes] __device__(IdxT gridStrideY) {
-      if (gridDim.x == 1) { return; }
-
-      // Use ::template to disambiguate (See:
-      // https://en.cppreference.com/w/cpp/language/dependent_name)
-      int smem_offset = OpT::template shared_mem_size<Policy>();
-      Pair* shDumpKV  = (Pair*)(&smem[smem_offset]);
-
-      const int lid     = threadIdx.x % warpSize;
-      const IdxT starty = gridStrideY + (threadIdx.x / Policy::AccThCols);
-
-      //  0 -> consumer done consuming the buffer.
-      // -1 -> consumer started consuming the buffer
-      // -2 -> producer done filling the buffer
-      //  1 -> prod acquired to fill the buffer
-      if (blockIdx.x == 0) {
-        auto cta_processed = 0;
-        myWarpSelect heapArr1(identity, keyMax, numOfNN);
-        myWarpSelect heapArr2(identity, keyMax, numOfNN);
-        myWarpSelect* heapArr[] = {&heapArr1, &heapArr2};
-        __syncwarp();
-
-        loadAllWarpQShmem<Policy, Pair>(heapArr, &shDumpKV[0], m, numOfNN);
-
-        while (cta_processed < gridDim.x - 1) {
-          if (threadIdx.x == 0) {
-            while (atomicCAS((int*)&mutexes[gridStrideY / Policy::Mblk], -2, -1) != -2)
-              ;
-          }
-          __threadfence();
-          __syncthreads();
-
-#pragma unroll
-          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-            const auto rowId = starty + i * Policy::AccThRows;
-            if (rowId < m) {
-#pragma unroll
-              for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-                Pair otherKV;
-                otherKV.value  = identity;
-                otherKV.key    = keyMax;
-                const auto idx = j * warpSize + lid;
-                if (idx < numOfNN) {
-                  otherKV.value         = out_dists[rowId * numOfNN + idx];
-                  otherKV.key           = (uint32_t)out_inds[rowId * numOfNN + idx];
-                  const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-                  shDumpKV[shMemRowId * numOfNN + idx] = otherKV;
-                }
-              }
-            }
-          }
-          __threadfence();
-          __syncthreads();
-
-          if (threadIdx.x == 0) { atomicExch((int*)&mutexes[gridStrideY / Policy::Mblk], 0); }
-          __threadfence();
-
-        // Perform merging of otherKV with topk's across warp.
-#pragma unroll
-          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-            const auto rowId = starty + i * Policy::AccThRows;
-            if (rowId < m) {
-#pragma unroll
-              for (int j = 0; j < myWarpSelect::kNumWarpQRegisters; ++j) {
-                Pair otherKV;
-                otherKV.value  = identity;
-                otherKV.key    = keyMax;
-                const auto idx = j * warpSize + lid;
-                if (idx < numOfNN) {
-                  const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-                  otherKV               = shDumpKV[shMemRowId * numOfNN + idx];
-                }
-                heapArr[i]->add(otherKV.value, otherKV.key);
-              }
-            }
-          }
-          cta_processed++;
-        }
-#pragma unroll
-        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-          const auto rowId = starty + i * Policy::AccThRows;
-          if (rowId < m) {
-            bool needSort = (heapArr[i]->numVals > 0);
-            needSort      = __any_sync(0xffffffff, needSort);
-            if (needSort) { heapArr[i]->reduce(); }
-          }
-        }
-        storeWarpQGmem<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
-      } else {
-        if (threadIdx.x == 0) {
-          while (atomicCAS((int*)&mutexes[gridStrideY / Policy::Mblk], 0, 1) != 0)
-            ;
-        }
-        __threadfence();
-        __syncthreads();
-
-#pragma unroll
-        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-          const auto rowId = starty + i * Policy::AccThRows;
-          if (rowId < m) {
-            for (int idx = lid; idx < numOfNN; idx += warpSize) {
-              const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-              Pair KVPair           = shDumpKV[shMemRowId * numOfNN + idx];
-              out_dists[rowId * numOfNN + idx] = KVPair.value;
-              out_inds[rowId * numOfNN + idx]  = (IdxT)KVPair.key;
-            }
-          }
-        }
-        __threadfence();
-        __syncthreads();
-
-        if (threadIdx.x == 0) { atomicExch((int*)&mutexes[gridStrideY / Policy::Mblk], -2); }
-        __threadfence();
-      }
-    };
-
-  // epilogue operation lambda for final value calculation
-  auto epilog_lambda =
-    [&distance_op, numOfNN, m, n, ldd, out_dists, out_inds, keyMax, identity] __device__(
-      AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh],
-      DataT * regxn,
-      DataT * regyn,
-      IdxT gridStrideX,
-      IdxT gridStrideY) {
-      // Use ::template to disambiguate (See:
-      // https://en.cppreference.com/w/cpp/language/dependent_name)
-      int smem_offset = OpT::template shared_mem_size<Policy>();
-      Pair* shDumpKV  = (Pair*)(&smem[smem_offset]);
-
-      constexpr uint32_t mask = 0xffffffffu;
-      const IdxT starty       = gridStrideY + (threadIdx.x / Policy::AccThCols);
-      const IdxT startx       = gridStrideX + (threadIdx.x % Policy::AccThCols);
-      const int lid           = raft::laneId();
-
-      myWarpSelect heapArr1(identity, keyMax, numOfNN);
-      myWarpSelect heapArr2(identity, keyMax, numOfNN);
-      myWarpSelect* heapArr[] = {&heapArr1, &heapArr2};
-      if (usePrevTopKs) {
-        if (gridStrideX == blockIdx.x * Policy::Nblk) {
-          loadPrevTopKsGmemWarpQ<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
-        }
-      }
-
-      if (gridStrideX > blockIdx.x * Policy::Nblk) {
-#pragma unroll
-        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-          const auto rowId     = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-          Pair tempKV          = shDumpKV[(rowId * numOfNN) + numOfNN - 1];
-          heapArr[i]->warpKTop = tempKV.value;
-        }
-
-        // total vals can atmost be 256, (32*8)
-        int numValsWarpTopK[Policy::AccRowsPerTh];
-        int anyWarpTopKs = 0;
-#pragma unroll
-        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-          const auto rowId   = starty + i * Policy::AccThRows;
-          numValsWarpTopK[i] = 0;
-          if (rowId < m) {
-#pragma unroll
-            for (int j = 0; j < Policy::AccColsPerTh; ++j) {
-              const auto colId = startx + j * Policy::AccThCols;
-              if (colId < ldd) {
-                if (acc[i][j] < heapArr[i]->warpKTop) { numValsWarpTopK[i]++; }
-              }
-            }
-            anyWarpTopKs += numValsWarpTopK[i];
-          }
-        }
-        anyWarpTopKs = __syncthreads_or(anyWarpTopKs > 0);
-        if (anyWarpTopKs) {
-          Pair* allWarpTopKs = (Pair*)(&smem[0]);
-          uint32_t needScanSort[Policy::AccRowsPerTh];
-
-#pragma unroll
-          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-            const auto gmemRowId = starty + i * Policy::AccThRows;
-            needScanSort[i]      = 0;
-            if (gmemRowId < m) {
-              int myVals      = numValsWarpTopK[i];
-              needScanSort[i] = __ballot_sync(mask, myVals > 0);
-              if (needScanSort[i]) {
-#pragma unroll
-                for (unsigned int k = 1; k <= 16; k *= 2) {
-                  const unsigned int n = __shfl_up_sync(mask, numValsWarpTopK[i], k);
-                  if (lid >= k) { numValsWarpTopK[i] += n; }
-                }
-              }
-              // As each thread will know its total vals to write.
-              // we only store its starting location.
-              numValsWarpTopK[i] -= myVals;
-            }
-
-            if (needScanSort[i]) {
-              const auto rowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-              if (gmemRowId < m) {
-                if (needScanSort[i] & ((uint32_t)1 << lid)) {
-#pragma unroll
-                  for (int j = 0; j < Policy::AccColsPerTh; ++j) {
-                    const auto colId = startx + j * Policy::AccThCols;
-                    if (colId < ldd) {
-                      if (acc[i][j] < heapArr[i]->warpKTop) {
-                        Pair otherKV                                     = {colId, acc[i][j]};
-                        allWarpTopKs[rowId * (256) + numValsWarpTopK[i]] = otherKV;
-                        numValsWarpTopK[i]++;
-                      }
-                    }
-                  }
-                }
-                __syncwarp();
-                const int finalNumVals = raft::shfl(numValsWarpTopK[i], 31);
-                loadWarpQShmem<Policy, Pair>(heapArr[i], &shDumpKV[0], rowId, numOfNN);
-                updateSortedWarpQ<Pair, myWarpSelect::kNumWarpQRegisters>(
-                  heapArr[i], &allWarpTopKs[0], rowId, finalNumVals);
-              }
-            }
-          }
-          __syncthreads();
-#pragma unroll
-          for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-            if (needScanSort[i]) {
-              const auto rowId     = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-              const auto gmemRowId = starty + i * Policy::AccThRows;
-              if (gmemRowId < m) {
-                storeWarpQShmem<Policy, Pair>(heapArr[i], shDumpKV, rowId, numOfNN);
-              }
-            }
-          }
-        }
-      } else {
-#pragma unroll
-        for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
-          const auto gmemRowId  = starty + i * Policy::AccThRows;
-          const auto shMemRowId = (threadIdx.x / Policy::AccThCols) + i * Policy::AccThRows;
-          if (gmemRowId < m) {
-#pragma unroll
-            for (int j = 0; j < Policy::AccColsPerTh; ++j) {
-              const auto colId = startx + j * Policy::AccThCols;
-              Pair otherKV     = {keyMax, identity};
-              if (colId < ldd) {
-                otherKV.value = acc[i][j];
-                otherKV.key   = colId;
-              }
-              heapArr[i]->add(otherKV.value, otherKV.key);
-            }
-
-            bool needSort = (heapArr[i]->numVals > 0);
-            needSort      = __any_sync(mask, needSort);
-            if (needSort) { heapArr[i]->reduce(); }
-            storeWarpQShmem<Policy, Pair>(heapArr[i], shDumpKV, shMemRowId, numOfNN);
-          }
-        }
-      }
-
-      if (((gridStrideX + Policy::Nblk * gridDim.x) >= n) && gridDim.x == 1) {
-        // This is last iteration of grid stride X
-        loadAllWarpQShmem<Policy, Pair>(heapArr, &shDumpKV[0], m, numOfNN);
-        storeWarpQGmem<Policy, Pair>(heapArr, out_dists, out_inds, m, numOfNN, starty);
-      }
-    };
-
-  constexpr bool write_out = false;
-  raft::distance::detail::PairwiseDistances<DataT,
-                                            OutT,
-                                            IdxT,
-                                            Policy,
-                                            OpT,
-                                            decltype(epilog_lambda),
-                                            FinalLambda,
-                                            decltype(rowEpilog_lambda),
-                                            isRowMajor,
-                                            write_out>
-    obj(x,
-        y,
-        m,
-        n,
-        k,
-        lda,
-        ldb,
-        ldd,
-        _xn,
-        _yn,
-        nullptr,  // output ptr, can be null as write_out == false.
-        smem,
-        distance_op,
-        epilog_lambda,
-        fin_op,
-        rowEpilog_lambda);
-  obj.run();
-}
-
-template <typename DataT,
-          typename AccT,
-          typename OutT,
-          typename IdxT,
-          int VecLen,
-          bool usePrevTopKs,
-          bool isRowMajor>
-void fusedL2UnexpKnnImpl(const DataT* x,
-                         const DataT* y,
-                         IdxT m,
-                         IdxT n,
-                         IdxT k,
-                         IdxT lda,
-                         IdxT ldb,
-                         IdxT ldd,
-                         bool sqrt,
-                         OutT* out_dists,
-                         IdxT* out_inds,
-                         IdxT numOfNN,
-                         cudaStream_t stream,
-                         void* workspace,
-                         size_t& worksize)
-{
-  typedef typename raft::linalg::Policy2x8<DataT, 1>::Policy RowPolicy;
-  typedef typename raft::linalg::Policy4x4<DataT, VecLen>::ColPolicy ColPolicy;
-
-  typedef typename std::conditional<true, RowPolicy, ColPolicy>::type KPolicy;
-
-  ASSERT(isRowMajor, "Only Row major inputs are allowed");
-
-  dim3 blk(KPolicy::Nthreads);
-  // Accumulation operation lambda
-  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
-
-  raft::distance::detail::ops::l2_unexp_distance_op<DataT, AccT, IdxT> distance_op{sqrt};
-  raft::identity_op fin_op{};
-
-  if constexpr (isRowMajor) {
-    constexpr auto fusedL2UnexpKnn32RowMajor = fusedL2kNN<DataT,
-                                                          OutT,
-                                                          IdxT,
-                                                          KPolicy,
-                                                          decltype(distance_op),
-                                                          decltype(fin_op),
-                                                          32,
-                                                          2,
-                                                          usePrevTopKs,
-                                                          isRowMajor>;
-    constexpr auto fusedL2UnexpKnn64RowMajor = fusedL2kNN<DataT,
-                                                          OutT,
-                                                          IdxT,
-                                                          KPolicy,
-                                                          decltype(distance_op),
-                                                          decltype(fin_op),
-                                                          64,
-                                                          3,
-                                                          usePrevTopKs,
-                                                          isRowMajor>;
-
-    auto fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn32RowMajor;
-    if (numOfNN <= 32) {
-      fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn32RowMajor;
-    } else if (numOfNN <= 64) {
-      fusedL2UnexpKnnRowMajor = fusedL2UnexpKnn64RowMajor;
-    } else {
-      ASSERT(numOfNN <= 64, "fusedL2kNN: num of nearest neighbors must be <= 64");
-    }
-
-    const auto sharedMemSize =
-      distance_op.template shared_mem_size<KPolicy>() + KPolicy::Mblk * numOfNN * sizeof(Pair);
-
-    dim3 grid = raft::distance::detail::launchConfigGenerator<KPolicy>(
-      m, n, sharedMemSize, fusedL2UnexpKnnRowMajor);
-
-    if (grid.x > 1) {
-      const auto numMutexes = raft::ceildiv<int>(m, KPolicy::Mblk);
-      if (workspace == nullptr || worksize < (sizeof(int32_t) * numMutexes)) {
-        worksize = sizeof(int32_t) * numMutexes;
-        return;
-      } else {
-        RAFT_CUDA_TRY(cudaMemsetAsync(workspace, 0, sizeof(int32_t) * numMutexes, stream));
-      }
-    }
-
-    fusedL2UnexpKnnRowMajor<<<grid, blk, sharedMemSize, stream>>>(x,
-                                                                  y,
-                                                                  nullptr,
-                                                                  nullptr,
-                                                                  m,
-                                                                  n,
-                                                                  k,
-                                                                  lda,
-                                                                  ldb,
-                                                                  ldd,
-                                                                  distance_op,
-                                                                  fin_op,
-                                                                  (uint32_t)numOfNN,
-                                                                  (int*)workspace,
-                                                                  out_dists,
-                                                                  out_inds);
-  } else {
-  }
-
-  RAFT_CUDA_TRY(cudaGetLastError());
-}
-
-template <typename DataT,
-          typename AccT,
-          typename OutT,
-          typename IdxT,
-          bool usePrevTopKs,
-          bool isRowMajor>
-void fusedL2UnexpKnn(IdxT m,
-                     IdxT n,
-                     IdxT k,
-                     IdxT lda,
-                     IdxT ldb,
-                     IdxT ldd,
-                     const DataT* x,
-                     const DataT* y,
-                     bool sqrt,
-                     OutT* out_dists,
-                     IdxT* out_inds,
-                     IdxT numOfNN,
-                     cudaStream_t stream,
-                     void* workspace,
-                     size_t& worksize)
-{
-  size_t bytesA = sizeof(DataT) * lda;
-  size_t bytesB = sizeof(DataT) * ldb;
-  if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) {
-    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 16 / sizeof(DataT), usePrevTopKs, isRowMajor>(
-      x,
-      y,
-      m,
-      n,
-      k,
-      lda,
-      ldb,
-      ldd,
-      sqrt,
-      out_dists,
-      out_inds,
-      numOfNN,
-      stream,
-      workspace,
-      worksize);
-  } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) {
-    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 8 / sizeof(DataT), usePrevTopKs, isRowMajor>(
-      x,
-      y,
-      m,
-      n,
-      k,
-      lda,
-      ldb,
-      ldd,
-      sqrt,
-      out_dists,
-      out_inds,
-      numOfNN,
-      stream,
-      workspace,
-      worksize);
-  } else {
-    fusedL2UnexpKnnImpl<DataT, AccT, OutT, IdxT, 1, usePrevTopKs, isRowMajor>(x,
-                                                                              y,
-                                                                              m,
-                                                                              n,
-                                                                              k,
-                                                                              lda,
-                                                                              ldb,
-                                                                              ldd,
-                                                                              sqrt,
-                                                                              out_dists,
-                                                                              out_inds,
-                                                                              numOfNN,
-                                                                              stream,
-                                                                              workspace,
-                                                                              worksize);
-  }
-}
-
-template <typename DataT,
-          typename AccT,
-          typename OutT,
-          typename IdxT,
-          int VecLen,
-          bool usePrevTopKs,
-          bool isRowMajor>
-void fusedL2ExpKnnImpl(const DataT* x,
-                       const DataT* y,
-                       IdxT m,
-                       IdxT n,
-                       IdxT k,
-                       IdxT lda,
-                       IdxT ldb,
-                       IdxT ldd,
-                       bool sqrt,
-                       OutT* out_dists,
-                       IdxT* out_inds,
-                       IdxT numOfNN,
-                       cudaStream_t stream,
-                       void* workspace,
-                       size_t& worksize)
-{
-  typedef typename raft::linalg::Policy2x8<DataT, 1>::Policy RowPolicy;
-  typedef typename raft::linalg::Policy4x4<DataT, VecLen>::ColPolicy ColPolicy;
-
-  typedef typename std::conditional<true, RowPolicy, ColPolicy>::type KPolicy;
-
-  ASSERT(isRowMajor, "Only Row major inputs are allowed");
-
-  ASSERT(!(((x != y) && (worksize < (m + n) * sizeof(AccT))) || (worksize < m * sizeof(AccT))),
-         "workspace size error");
-  ASSERT(workspace != nullptr, "workspace is null");
-
-  dim3 blk(KPolicy::Nthreads);
-
-  typedef cub::KeyValuePair<uint32_t, AccT> Pair;
-
-  raft::distance::detail::ops::l2_exp_distance_op<DataT, AccT, IdxT> distance_op{sqrt};
-  raft::identity_op fin_op{};
-
-  if constexpr (isRowMajor) {
-    constexpr auto fusedL2ExpKnn32RowMajor = fusedL2kNN<DataT,
-                                                        OutT,
-                                                        IdxT,
-                                                        KPolicy,
-                                                        decltype(distance_op),
-                                                        decltype(fin_op),
-                                                        32,
-                                                        2,
-                                                        usePrevTopKs,
-                                                        isRowMajor>;
-    constexpr auto fusedL2ExpKnn64RowMajor = fusedL2kNN<DataT,
-                                                        OutT,
-                                                        IdxT,
-                                                        KPolicy,
-                                                        decltype(distance_op),
-                                                        decltype(fin_op),
-                                                        64,
-                                                        3,
-                                                        usePrevTopKs,
-                                                        isRowMajor>;
-
-    auto fusedL2ExpKnnRowMajor = fusedL2ExpKnn32RowMajor;
-    if (numOfNN <= 32) {
-      fusedL2ExpKnnRowMajor = fusedL2ExpKnn32RowMajor;
-    } else if (numOfNN <= 64) {
-      fusedL2ExpKnnRowMajor = fusedL2ExpKnn64RowMajor;
-    } else {
-      ASSERT(numOfNN <= 64, "fusedL2kNN: num of nearest neighbors must be <= 64");
-    }
-
-    const auto sharedMemSize =
-      distance_op.template shared_mem_size<KPolicy>() + (KPolicy::Mblk * numOfNN * sizeof(Pair));
-    dim3 grid = raft::distance::detail::launchConfigGenerator<KPolicy>(
-      m, n, sharedMemSize, fusedL2ExpKnnRowMajor);
-    int32_t* mutexes = nullptr;
-    if (grid.x > 1) {
-      const auto numMutexes   = raft::ceildiv<int>(m, KPolicy::Mblk);
-      const auto normsSize    = (x != y) ? (m + n) * sizeof(DataT) : n * sizeof(DataT);
-      const auto requiredSize = sizeof(int32_t) * numMutexes + normsSize;
-      if (worksize < requiredSize) {
-        worksize = requiredSize;
-        return;
-      } else {
-        mutexes = (int32_t*)((char*)workspace + normsSize);
-        RAFT_CUDA_TRY(cudaMemsetAsync(mutexes, 0, sizeof(int32_t) * numMutexes, stream));
-      }
-    }
-
-    DataT* xn = (DataT*)workspace;
-    DataT* yn = (DataT*)workspace;
-
-    if (x != y) {
-      yn += m;
-      raft::linalg::rowNorm(
-        xn, x, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
-      raft::linalg::rowNorm(
-        yn, y, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
-    } else {
-      raft::linalg::rowNorm(
-        xn, x, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
-    }
-    fusedL2ExpKnnRowMajor<<<grid, blk, sharedMemSize, stream>>>(x,
-                                                                y,
-                                                                xn,
-                                                                yn,
-                                                                m,
-                                                                n,
-                                                                k,
-                                                                lda,
-                                                                ldb,
-                                                                ldd,
-                                                                distance_op,
-                                                                fin_op,
-                                                                (uint32_t)numOfNN,
-                                                                mutexes,
-                                                                out_dists,
-                                                                out_inds);
-  } else {
-  }
-
-  RAFT_CUDA_TRY(cudaGetLastError());
-}
-
-template <typename DataT,
-          typename AccT,
-          typename OutT,
-          typename IdxT,
-          bool usePrevTopKs,
-          bool isRowMajor>
-void fusedL2ExpKnn(IdxT m,
-                   IdxT n,
-                   IdxT k,
-                   IdxT lda,
-                   IdxT ldb,
-                   IdxT ldd,
-                   const DataT* x,
-                   const DataT* y,
-                   bool sqrt,
-                   OutT* out_dists,
-                   IdxT* out_inds,
-                   IdxT numOfNN,
-                   cudaStream_t stream,
-                   void* workspace,
-                   size_t& worksize)
-{
-  size_t bytesA = sizeof(DataT) * lda;
-  size_t bytesB = sizeof(DataT) * ldb;
-  if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) {
-    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 16 / sizeof(DataT), usePrevTopKs, isRowMajor>(
-      x,
-      y,
-      m,
-      n,
-      k,
-      lda,
-      ldb,
-      ldd,
-      sqrt,
-      out_dists,
-      out_inds,
-      numOfNN,
-      stream,
-      workspace,
-      worksize);
-  } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) {
-    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 8 / sizeof(DataT), usePrevTopKs, isRowMajor>(
-      x,
-      y,
-      m,
-      n,
-      k,
-      lda,
-      ldb,
-      ldd,
-      sqrt,
-      out_dists,
-      out_inds,
-      numOfNN,
-      stream,
-      workspace,
-      worksize);
-  } else {
-    fusedL2ExpKnnImpl<DataT, AccT, OutT, IdxT, 1, usePrevTopKs, isRowMajor>(x,
-                                                                            y,
-                                                                            m,
-                                                                            n,
-                                                                            k,
-                                                                            lda,
-                                                                            ldb,
-                                                                            ldd,
-                                                                            sqrt,
-                                                                            out_dists,
-                                                                            out_inds,
-                                                                            numOfNN,
-                                                                            stream,
-                                                                            workspace,
-                                                                            worksize);
-  }
-}
-
-/**
- * Compute the k-nearest neighbors using L2 expanded/unexpanded distance.
-
- * @tparam value_idx
- * @tparam value_t
- * @param[out] out_inds output indices array on device (size n_query_rows * k)
- * @param[out] out_dists output dists array on device (size n_query_rows * k)
- * @param[in] index input index array on device (size n_index_rows * D)
- * @param[in] query input query array on device (size n_query_rows * D)
- * @param[in] n_index_rows number of rows in index array
- * @param[in] n_query_rows number of rows in query array
- * @param[in] k number of closest neighbors to return
- * @param[in] rowMajorIndex are the index arrays in row-major layout?
- * @param[in] rowMajorQuery are the query array in row-major layout?
- * @param[in] stream stream to order kernel launch
- */
-template <typename value_idx, typename value_t, bool usePrevTopKs = false>
-void fusedL2Knn(size_t D,
-                value_idx* out_inds,
-                value_t* out_dists,
-                const value_t* index,
-                const value_t* query,
-                size_t n_index_rows,
-                size_t n_query_rows,
-                int k,
-                bool rowMajorIndex,
-                bool rowMajorQuery,
-                cudaStream_t stream,
-                raft::distance::DistanceType metric)
-{
-  // Validate the input data
-  ASSERT(k > 0, "l2Knn: k must be > 0");
-  ASSERT(D > 0, "l2Knn: D must be > 0");
-  ASSERT(n_index_rows > 0, "l2Knn: n_index_rows must be > 0");
-  ASSERT(index, "l2Knn: index must be provided (passed null)");
-  ASSERT(n_query_rows > 0, "l2Knn: n_query_rows must be > 0");
-  ASSERT(query, "l2Knn: query must be provided (passed null)");
-  ASSERT(out_dists, "l2Knn: out_dists must be provided (passed null)");
-  ASSERT(out_inds, "l2Knn: out_inds must be provided (passed null)");
-  // Currently we only support same layout for x & y inputs.
-  ASSERT(rowMajorIndex == rowMajorQuery,
-         "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
-  // TODO: Add support for column major layout
-  ASSERT(rowMajorIndex == true, "l2Knn: only rowMajor inputs are supported for now.");
-
-  // Even for L2 Sqrt distance case we use non-sqrt version as FAISS bfKNN only support
-  // non-sqrt metric & some tests in RAFT/cuML (like Linkage) fails if we use L2 sqrt.
-  constexpr bool sqrt = false;
-
-  size_t worksize = 0, tempWorksize = 0;
-  rmm::device_uvector<char> workspace(worksize, stream);
-  value_idx lda = D, ldb = D, ldd = n_index_rows;
-
-  switch (metric) {
-    case raft::distance::DistanceType::L2SqrtExpanded:
-    case raft::distance::DistanceType::L2Expanded:
-      tempWorksize = raft::distance::detail::
-        getWorkspaceSize<raft::distance::DistanceType::L2Expanded, float, float, float, value_idx>(
-          query, index, n_query_rows, n_index_rows, D);
-      worksize = tempWorksize;
-      workspace.resize(worksize, stream);
-      fusedL2ExpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
-                                                                              n_index_rows,
-                                                                              D,
-                                                                              lda,
-                                                                              ldb,
-                                                                              ldd,
-                                                                              query,
-                                                                              index,
-                                                                              sqrt,
-                                                                              out_dists,
-                                                                              out_inds,
-                                                                              k,
-                                                                              stream,
-                                                                              workspace.data(),
-                                                                              worksize);
-      if (worksize > tempWorksize) {
-        workspace.resize(worksize, stream);
-        fusedL2ExpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
-                                                                                n_index_rows,
-                                                                                D,
-                                                                                lda,
-                                                                                ldb,
-                                                                                ldd,
-                                                                                query,
-                                                                                index,
-                                                                                sqrt,
-                                                                                out_dists,
-                                                                                out_inds,
-                                                                                k,
-                                                                                stream,
-                                                                                workspace.data(),
-                                                                                worksize);
-      }
-      break;
-    case raft::distance::DistanceType::L2Unexpanded:
-    case raft::distance::DistanceType::L2SqrtUnexpanded:
-      fusedL2UnexpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
-                                                                                n_index_rows,
-                                                                                D,
-                                                                                lda,
-                                                                                ldb,
-                                                                                ldd,
-                                                                                query,
-                                                                                index,
-                                                                                sqrt,
-                                                                                out_dists,
-                                                                                out_inds,
-                                                                                k,
-                                                                                stream,
-                                                                                workspace.data(),
-                                                                                worksize);
-      if (worksize) {
-        workspace.resize(worksize, stream);
-        fusedL2UnexpKnn<value_t, value_t, value_t, value_idx, usePrevTopKs, true>(n_query_rows,
-                                                                                  n_index_rows,
-                                                                                  D,
-                                                                                  lda,
-                                                                                  ldb,
-                                                                                  ldd,
-                                                                                  query,
-                                                                                  index,
-                                                                                  sqrt,
-                                                                                  out_dists,
-                                                                                  out_inds,
-                                                                                  k,
-                                                                                  stream,
-                                                                                  workspace.data(),
-                                                                                  worksize);
-      }
-      break;
-    default: printf("only L2 distance metric is supported\n"); break;
-  };
-}
-
-}  // namespace detail
-}  // namespace knn
-}  // namespace spatial
-}  // namespace raft
+#ifdef RAFT_COMPILED
+#include "fused_l2_knn-ext.cuh"
+#endif
diff --git a/cpp/include/raft/spatial/knn/specializations.cuh b/cpp/include/raft/spatial/knn/specializations.cuh
index 5f0a39a61b..ed0b6848ae 100644
--- a/cpp/include/raft/spatial/knn/specializations.cuh
+++ b/cpp/include/raft/spatial/knn/specializations.cuh
@@ -13,9 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/neighbors/specializations/ball_cover.cuh>
-#include <raft/neighbors/specializations/brute_force.cuh>
-#include <raft/neighbors/specializations/fused_l2_knn.cuh>
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/spatial/knn/specializations/knn.cuh b/cpp/include/raft/spatial/knn/specializations/knn.cuh
index e045487597..ed0b6848ae 100644
--- a/cpp/include/raft/spatial/knn/specializations/knn.cuh
+++ b/cpp/include/raft/spatial/knn/specializations/knn.cuh
@@ -13,31 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
-#include <raft/spatial/knn/knn.cuh>
-
-namespace raft::spatial::knn {
-#define RAFT_INST(IdxT, T, IntT)                                                            \
-  extern template void brute_force_knn<IdxT, T, IntT>(raft::device_resources const& handle, \
-                                                      std::vector<T*>& input,               \
-                                                      std::vector<IntT>& sizes,             \
-                                                      IntT D,                               \
-                                                      T* search_items,                      \
-                                                      IntT n,                               \
-                                                      IdxT* res_I,                          \
-                                                      T* res_D,                             \
-                                                      IntT k,                               \
-                                                      bool rowMajorIndex,                   \
-                                                      bool rowMajorQuery,                   \
-                                                      std::vector<IdxT>* translations,      \
-                                                      distance::DistanceType metric,        \
-                                                      float metric_arg);
-
-RAFT_INST(long, float, int);
-RAFT_INST(long, float, unsigned int);
-RAFT_INST(uint32_t, float, int);
-RAFT_INST(uint32_t, float, unsigned int);
-#undef RAFT_INST
-};  // namespace raft::spatial::knn
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/spectral/specializations.cuh b/cpp/include/raft/spectral/specializations.cuh
index 0ce5f0c653..9588a7f329 100644
--- a/cpp/include/raft/spectral/specializations.cuh
+++ b/cpp/include/raft/spectral/specializations.cuh
@@ -13,12 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef __SPECTRAL_SPECIALIZATIONS_H
-#define __SPECTRAL_SPECIALIZATIONS_H
-
 #pragma once
 
-#include <raft/distance/specializations.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#endif
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/stats/specializations.cuh b/cpp/include/raft/stats/specializations.cuh
index e6622469d3..9588a7f329 100644
--- a/cpp/include/raft/stats/specializations.cuh
+++ b/cpp/include/raft/stats/specializations.cuh
@@ -13,12 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef __STATS_SPECIALIZATIONS_H
-#define __STATS_SPECIALIZATIONS_H
-
 #pragma once
 
-#include <raft/distance/specializations.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#endif
\ No newline at end of file
+#pragma message(                                            \
+    __FILE__                                                \
+    " is deprecated and will be removed."                   \
+    " Including specializations is not necessary any more." \
+    " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html")
diff --git a/cpp/include/raft/util/cudart_utils.hpp b/cpp/include/raft/util/cudart_utils.hpp
index 1134513587..f3b083ac4a 100644
--- a/cpp/include/raft/util/cudart_utils.hpp
+++ b/cpp/include/raft/util/cudart_utils.hpp
@@ -18,10 +18,9 @@
 
 #include <raft/core/error.hpp>
 #include <raft/util/cuda_rt_essentials.hpp>
+#include <raft/util/memory_pool.hpp>
+
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/managed_memory_resource.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
 
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
@@ -451,51 +450,4 @@ constexpr inline auto upper_bound<half>() -> half
   return static_cast<half>(__half_constexpr{0x7c00u});
 }
 
-/**
- * @brief Get a pointer to a pooled memory resource within the scope of the lifetime of the returned
- * unique pointer.
- *
- * This function is useful in the code where multiple repeated allocations/deallocations are
- * expected.
- * Use case example:
- * @code{.cpp}
- *   void my_func(..., size_t n, rmm::mr::device_memory_resource* mr = nullptr) {
- *     auto pool_guard = raft::get_pool_memory_resource(mr, 2 * n * sizeof(float));
- *     if (pool_guard){
- *       RAFT_LOG_INFO("Created a pool %zu bytes", pool_guard->pool_size());
- *     } else {
- *       RAFT_LOG_INFO("Using the current default or explicitly passed device memory resource");
- *     }
- *     rmm::device_uvector<float> x(n, stream, mr);
- *     rmm::device_uvector<float> y(n, stream, mr);
- *     ...
- *   }
- * @endcode
- * Here, the new memory resource would be created within the function scope if the passed `mr` is
- * null and the default resource is not a pool. After the call, `mr` contains a valid memory
- * resource in any case.
- *
- * @param[inout] mr if not null do nothing; otherwise get the current device resource and wrap it
- * into a `pool_memory_resource` if necessary and return the pointer to the result.
- * @param initial_size if a new memory pool is created, this would be its initial size (rounded up
- * to 256 bytes).
- *
- * @return if a new memory pool is created, it returns a unique_ptr to it;
- *   this managed pointer controls the lifetime of the created memory resource.
- */
-inline auto get_pool_memory_resource(rmm::mr::device_memory_resource*& mr, size_t initial_size)
-{
-  using pool_res_t = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
-  std::unique_ptr<pool_res_t> pool_res{};
-  if (mr) return pool_res;
-  mr = rmm::mr::get_current_device_resource();
-  if (!dynamic_cast<pool_res_t*>(mr) &&
-      !dynamic_cast<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>*>(mr) &&
-      !dynamic_cast<rmm::mr::pool_memory_resource<rmm::mr::managed_memory_resource>*>(mr)) {
-    pool_res = std::make_unique<pool_res_t>(mr, (initial_size + 255) & (~255));
-    mr       = pool_res.get();
-  }
-  return pool_res;
-}
-
 }  // namespace raft
diff --git a/cpp/include/raft/util/detail/cub_wrappers.cuh b/cpp/include/raft/util/detail/cub_wrappers.cuh
index 8c70331165..0ce749d9c8 100644
--- a/cpp/include/raft/util/detail/cub_wrappers.cuh
+++ b/cpp/include/raft/util/detail/cub_wrappers.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ void sortPairs(rmm::device_uvector<char>& workspace,
                int len,
                cudaStream_t stream)
 {
-  size_t worksize;
+  size_t worksize = 0;  //  Fix 'worksize' may be used uninitialized in this function.
   cub::DeviceRadixSort::SortPairs(
     nullptr, worksize, inKeys, outKeys, inVals, outVals, len, 0, sizeof(KeyT) * 8, stream);
   workspace.resize(worksize, stream);
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu b/cpp/include/raft/util/memory_pool-ext.hpp
similarity index 55%
rename from cpp/src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
rename to cpp/include/raft/util/memory_pool-ext.hpp
index 33c4e7ffc0..a02908346b 100644
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
+++ b/cpp/include/raft/util/memory_pool-ext.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include <cuda_fp16.h>
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
+#pragma once
+#include <cstddef>                                   // size_t
+#include <memory>                                    // std::unique_ptr
+#include <rmm/mr/device/device_memory_resource.hpp>  // rmm::mr::device_memory_resource
 
-namespace raft::neighbors::ivf_pq::detail {
+namespace raft {
 
-template auto get_compute_similarity_kernel<float, float, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, float>;
+std::unique_ptr<rmm::mr::device_memory_resource> get_pool_memory_resource(
+  rmm::mr::device_memory_resource*& mr, size_t initial_size);
 
-}  // namespace raft::neighbors::ivf_pq::detail
+}  // namespace raft
diff --git a/cpp/include/raft/util/memory_pool-inl.hpp b/cpp/include/raft/util/memory_pool-inl.hpp
new file mode 100644
index 0000000000..a227b6e53f
--- /dev/null
+++ b/cpp/include/raft/util/memory_pool-inl.hpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstddef>
+#include <memory>
+
+#include <raft/core/detail/macros.hpp>  // RAFT_INLINE_CONDITIONAL
+#include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+namespace raft {
+
+/**
+ * @brief Get a pointer to a pooled memory resource within the scope of the lifetime of the returned
+ * unique pointer.
+ *
+ * This function is useful in the code where multiple repeated allocations/deallocations are
+ * expected.
+ * Use case example:
+ * @code{.cpp}
+ *   void my_func(..., size_t n, rmm::mr::device_memory_resource* mr = nullptr) {
+ *     auto pool_guard = raft::get_pool_memory_resource(mr, 2 * n * sizeof(float));
+ *     if (pool_guard){
+ *       RAFT_LOG_INFO("Created a pool %zu bytes", pool_guard->pool_size());
+ *     } else {
+ *       RAFT_LOG_INFO("Using the current default or explicitly passed device memory resource");
+ *     }
+ *     rmm::device_uvector<float> x(n, stream, mr);
+ *     rmm::device_uvector<float> y(n, stream, mr);
+ *     ...
+ *   }
+ * @endcode
+ * Here, the new memory resource would be created within the function scope if the passed `mr` is
+ * null and the default resource is not a pool. After the call, `mr` contains a valid memory
+ * resource in any case.
+ *
+ * @param[inout] mr if not null do nothing; otherwise get the current device resource and wrap it
+ * into a `pool_memory_resource` if necessary and return the pointer to the result.
+ * @param initial_size if a new memory pool is created, this would be its initial size (rounded up
+ * to 256 bytes).
+ *
+ * @return if a new memory pool is created, it returns a unique_ptr to it;
+ *   this managed pointer controls the lifetime of the created memory resource.
+ */
+RAFT_INLINE_CONDITIONAL std::unique_ptr<rmm::mr::device_memory_resource> get_pool_memory_resource(
+  rmm::mr::device_memory_resource*& mr, size_t initial_size)
+{
+  using pool_res_t = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
+  std::unique_ptr<pool_res_t> pool_res{};
+  if (mr) return pool_res;
+  mr = rmm::mr::get_current_device_resource();
+  if (!dynamic_cast<pool_res_t*>(mr) &&
+      !dynamic_cast<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>*>(mr) &&
+      !dynamic_cast<rmm::mr::pool_memory_resource<rmm::mr::managed_memory_resource>*>(mr)) {
+    pool_res = std::make_unique<pool_res_t>(mr, (initial_size + 255) & (~255));
+    mr       = pool_res.get();
+  }
+  return pool_res;
+}
+
+}  // namespace raft
diff --git a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu b/cpp/include/raft/util/memory_pool.hpp
similarity index 72%
rename from cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu
rename to cpp/include/raft/util/memory_pool.hpp
index 423613dcd1..c9d25ecb1f 100644
--- a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_float.cu
+++ b/cpp/include/raft/util/memory_pool.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,10 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
+#pragma once
 
-template class raft::distance::kernels::detail::RBFKernel<float>;
\ No newline at end of file
+#include "memory_pool-ext.hpp"
+
+#if !defined(RAFT_COMPILED)
+#include "memory_pool-inl.hpp"
+#endif  // RAFT_COMPILED
diff --git a/cpp/include/raft/util/raft_explicit.hpp b/cpp/include/raft/util/raft_explicit.hpp
new file mode 100644
index 0000000000..77e6b57802
--- /dev/null
+++ b/cpp/include/raft/util/raft_explicit.hpp
@@ -0,0 +1,88 @@
+/* Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+/**
+ * @brief Prevents a function template from being implicitly instantiated
+ *
+ * This macro defines a function body that can be used for function template
+ * definitions of functions that should not be implicitly instantiated.
+ *
+ * When the template is erroneously implicitly instantiated, it provides a
+ * useful error message that tells the user how to avoid the implicit
+ * instantiation.
+ *
+ * The error message is generated using a static assert. It is generally tricky
+ * to have a static assert fire only when you want it, as documented in
+ * P2593: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html
+ *
+ * We use the strategy from paragraph 1.3 here. We define a struct
+ * `not_allowed`, whose type is dependent on the template parameters of the
+ * enclosing function instance. We use this struct type to instantiate the
+ * `implicit_instantiation` template class, whose value is always false. We pass
+ * this value to static_assert. This way, the static assert only fires when the
+ * template is instantiated, since `implicit_instantiation` cannot be
+ * instantiated without all the types in the enclosing function template.
+ */
+#define RAFT_EXPLICIT                                                                          \
+  {                                                                                            \
+    /* Type of `not_allowed` depends on template parameters of enclosing function. */          \
+    struct not_allowed {};                                                                     \
+    static_assert(                                                                             \
+      raft::util::raft_explicit::implicit_instantiation<not_allowed>::value,                   \
+      "ACCIDENTAL_IMPLICIT_INSTANTIATION\n\n"                                                  \
+                                                                                               \
+      "If you see this error, then you have implicitly instantiated a function\n"              \
+      "template. To keep compile times in check, libraft has the policy of\n"                  \
+      "explicitly instantiating templates. To fix the compilation error, follow\n"             \
+      "these steps.\n\n"                                                                       \
+                                                                                               \
+      "If you scroll up or down a bit, you probably saw a line like the following:\n\n"        \
+                                                                                               \
+      "detected during instantiation of \"void raft::foo(T) [with T=float]\" at line [..]\n\n" \
+                                                                                               \
+      "Simplest temporary solution:\n\n"                                                       \
+                                                                                               \
+      "    Add '#undef RAFT_EXPLICIT_INSTANTIATE_ONLY' at the top of your .cpp/.cu file.\n\n"  \
+                                                                                               \
+      "Best solution:\n\n"                                                                     \
+                                                                                               \
+      "    1. Add the following line to the file include/raft/foo.hpp:\n\n"                    \
+                                                                                               \
+      "        extern template void raft::foo<double>(double);\n\n"                            \
+                                                                                               \
+      "    2. Add the following line to the file src/raft/foo.cpp:\n\n"                        \
+                                                                                               \
+      "        template void raft::foo<double>(double)\n");                                    \
+                                                                                               \
+    /* Function may have non-void return type. */                                              \
+    /* To prevent warnings/errors about missing returns, throw an exception. */                \
+    throw "raft_explicit_error";                                                               \
+  }
+
+namespace raft::util::raft_explicit {
+/**
+ * @brief Template that is always false
+ *
+ * This template is from paragraph 1.3 of P2593:
+ * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html
+ *
+ * The value of `value` is always false, but it depends on a template parameter.
+ */
+template <typename T>
+struct implicit_instantiation {
+  static constexpr bool value = false;
+};
+}  // namespace raft::util::raft_explicit
diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh
index a3535f8ffd..3d7a11e91e 100644
--- a/cpp/internal/raft_internal/matrix/select_k.cuh
+++ b/cpp/internal/raft_internal/matrix/select_k.cuh
@@ -16,16 +16,11 @@
 
 #pragma once
 
+#include <raft/core/device_resources.hpp>
 #include <raft/matrix/detail/select_radix.cuh>
 #include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/matrix/select_k.cuh>
 
-#ifdef RAFT_COMPILED
-#include <raft/matrix/specializations.cuh>
-#endif
-
-#include <raft/core/device_resources.hpp>
-
 namespace raft::matrix::select {
 
 struct params {
diff --git a/cpp/internal/raft_internal/neighbors/naive_knn.cuh b/cpp/internal/raft_internal/neighbors/naive_knn.cuh
index 47d6f068e3..3ad055272b 100644
--- a/cpp/internal/raft_internal/neighbors/naive_knn.cuh
+++ b/cpp/internal/raft_internal/neighbors/naive_knn.cuh
@@ -21,10 +21,6 @@
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cuda_utils.cuh>
 
-#if defined RAFT_COMPILED
-#include <raft/matrix/specializations/detail/select_k.cuh>
-#endif
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
diff --git a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu b/cpp/src/core/logger.cpp
similarity index 71%
rename from cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu
rename to cpp/src/core/logger.cpp
index d777e73dc9..8f81cf2926 100644
--- a/cpp/src/distance/specializations/detail/kernels/gram_matrix_base_float.cu
+++ b/cpp/src/core/logger.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,8 +13,4 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include <raft/distance/detail/kernels/gram_matrix.cuh>
-#include <raft/distance/specializations.cuh>
-
-template class raft::distance::kernels::detail::GramMatrixBase<float>;
\ No newline at end of file
+#include <raft/core/logger-inl.hpp>
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py
new file mode 100644
index 0000000000..97fe120458
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py
@@ -0,0 +1,194 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NOTE: this template is not perfectly formatted. Use pre-commit to get
+# everything in shape again.
+header = """/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp> // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>  // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh> // dispatch
+"""
+
+
+macro = """
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \\
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \\
+  template void raft::distance::detail::                                               \\
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \\
+      OpT<DataT, AccT, IdxT> distance_op,                                              \\
+      IdxT m,                                                                          \\
+      IdxT n,                                                                          \\
+      IdxT k,                                                                          \\
+      const DataT* x,                                                                  \\
+      const DataT* y,                                                                  \\
+      const DataT* x_norm,                                                             \\
+      const DataT* y_norm,                                                             \\
+      OutT* out,                                                                       \\
+      FinOpT fin_op,                                                                   \\
+      cudaStream_t stream,                                                             \\
+      bool is_row_major)
+"""
+
+data_type_instances = [
+    dict(
+        DataT="float",
+        AccT="float",
+        OutT="float",
+        IdxT="int",
+    ),
+    dict(
+        DataT="double",
+        AccT="double",
+        OutT="double",
+        IdxT="int",
+    ),
+]
+
+op_instances = [
+    dict(
+        path_prefix="canberra",
+        OpT="raft::distance::detail::ops::canberra_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="correlation",
+        OpT="raft::distance::detail::ops::correlation_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="cosine",
+        OpT="raft::distance::detail::ops::cosine_distance_op",
+        archs = [60, 80],
+    ),
+    dict(
+        path_prefix="hamming_unexpanded",
+        OpT="raft::distance::detail::ops::hamming_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="hellinger_expanded",
+        OpT="raft::distance::detail::ops::hellinger_distance_op",
+        archs = [60],
+    ),
+    # inner product is handled by cublas.
+    dict(
+        path_prefix="jensen_shannon",
+        OpT="raft::distance::detail::ops::jensen_shannon_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="kl_divergence",
+        OpT="raft::distance::detail::ops::kl_divergence_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="l1",
+        OpT="raft::distance::detail::ops::l1_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="l2_expanded",
+        OpT="raft::distance::detail::ops::l2_exp_distance_op",
+        archs = [60, 80],
+    ),
+    dict(
+        path_prefix="l2_unexpanded",
+        OpT="raft::distance::detail::ops::l2_unexp_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="l_inf",
+        OpT="raft::distance::detail::ops::l_inf_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="lp_unexpanded",
+        OpT="raft::distance::detail::ops::lp_unexp_distance_op",
+        archs = [60],
+    ),
+    dict(
+        path_prefix="russel_rao",
+        OpT="raft::distance::detail::ops::russel_rao_distance_op",
+        archs = [60],
+     ),
+]
+
+def arch_headers(archs):
+    include_headers ="\n".join([
+        f"#include <raft/distance/detail/pairwise_matrix/dispatch_sm{arch}.cuh>"
+        for arch in archs
+    ])
+    return include_headers
+
+
+
+for op in op_instances:
+    for dt in data_type_instances:
+        DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]);
+        path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu"
+        with open(path, "w") as f:
+            f.write(header)
+            f.write(arch_headers(op["archs"]))
+            f.write(macro)
+
+            OpT = op['OpT']
+            FinOpT = "raft::identity_op"
+            f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n")
+            f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n")
+        print(f"src/distance/detail/pairwise_matrix/{path}")
+
+# Dispatch kernels for with the RBF fin op.
+with open("dispatch_rbf.cu", "w") as f:
+        OpT="raft::distance::detail::ops::l2_unexp_distance_op"
+        archs = [60]
+
+        f.write(header)
+        f.write("#include <raft/distance/detail/kernels/rbf_fin_op.cuh> // rbf_fin_op\n")
+        f.write(arch_headers(archs))
+        f.write(macro)
+
+        for dt in data_type_instances:
+            DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]);
+            IdxT = "int64_t"    # overwrite IdxT
+
+            FinOpT = f"raft::distance::kernels::detail::rbf_fin_op<{DataT}>"
+            f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n")
+
+        f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n")
+
+print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu")
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
new file mode 100644
index 0000000000..41db12e9ae
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::canberra_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
new file mode 100644
index 0000000000..f038e53381
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::canberra_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
new file mode 100644
index 0000000000..52e4cc02d8
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::correlation_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
new file mode 100644
index 0000000000..c9481d6c22
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::correlation_distance_op,
+  float,
+  float,
+  float,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
new file mode 100644
index 0000000000..517858125b
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::cosine_distance_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
new file mode 100644
index 0000000000..62f1d9874b
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::cosine_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
new file mode 100644
index 0000000000..500f7b4a9c
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hamming_distance_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
new file mode 100644
index 0000000000..3be7586b43
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hamming_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
new file mode 100644
index 0000000000..023134ddff
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hellinger_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
new file mode 100644
index 0000000000..e438f121f2
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::hellinger_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
new file mode 100644
index 0000000000..31c5003ad6
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::jensen_shannon_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
new file mode 100644
index 0000000000..e78c1c320a
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::jensen_shannon_distance_op,
+  float,
+  float,
+  float,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
new file mode 100644
index 0000000000..5b95df9614
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::kl_divergence_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
new file mode 100644
index 0000000000..fb72c91b73
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::kl_divergence_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
new file mode 100644
index 0000000000..cac5acad92
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l1_distance_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
new file mode 100644
index 0000000000..78aa097961
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l1_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
new file mode 100644
index 0000000000..c8d922f6fa
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_exp_distance_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
new file mode 100644
index 0000000000..20cf57f898
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_exp_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
new file mode 100644
index 0000000000..eadd0d2c2b
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
new file mode 100644
index 0000000000..e4b5dd3a86
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
new file mode 100644
index 0000000000..45d021bce9
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l_inf_distance_op, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
new file mode 100644
index 0000000000..ba48e52a18
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l_inf_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
new file mode 100644
index 0000000000..ffa58793d9
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::lp_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
new file mode 100644
index 0000000000..915c68f05f
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::lp_unexp_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu
new file mode 100644
index 0000000000..15855cea0a
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/kernels/rbf_fin_op.cuh>            // rbf_fin_op
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  float,
+  float,
+  float,
+  raft::distance::kernels::detail::rbf_fin_op<float>,
+  int64_t);
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::l2_unexp_distance_op,
+  double,
+  double,
+  double,
+  raft::distance::kernels::detail::rbf_fin_op<double>,
+  int64_t);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
new file mode 100644
index 0000000000..db45dc8b94
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::russel_rao_distance_op,
+  double,
+  double,
+  double,
+  raft::identity_op,
+  int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
new file mode 100644
index 0000000000..a2a5a9fafe
--- /dev/null
+++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by dispatch_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python dispatch_00_generate.py
+ *
+ */
+
+#include <raft/core/operators.hpp>                                // raft::identity_op
+#include <raft/distance/detail/distance_ops/all_ops.cuh>          // ops::*
+#include <raft/distance/detail/pairwise_matrix/dispatch-inl.cuh>  // dispatch
+#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
+#define instantiate_raft_distance_detail_pairwise_matrix_dispatch(                     \
+  OpT, DataT, AccT, OutT, FinOpT, IdxT)                                                \
+  template void raft::distance::detail::                                               \
+    pairwise_matrix_dispatch<OpT<DataT, AccT, IdxT>, DataT, AccT, OutT, FinOpT, IdxT>( \
+      OpT<DataT, AccT, IdxT> distance_op,                                              \
+      IdxT m,                                                                          \
+      IdxT n,                                                                          \
+      IdxT k,                                                                          \
+      const DataT* x,                                                                  \
+      const DataT* y,                                                                  \
+      const DataT* x_norm,                                                             \
+      const DataT* y_norm,                                                             \
+      OutT* out,                                                                       \
+      FinOpT fin_op,                                                                   \
+      cudaStream_t stream,                                                             \
+      bool is_row_major)
+
+instantiate_raft_distance_detail_pairwise_matrix_dispatch(
+  raft::distance::detail::ops::russel_rao_distance_op, float, float, float, raft::identity_op, int);
+
+#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch
diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu
new file mode 100644
index 0000000000..8c94608311
--- /dev/null
+++ b/cpp/src/distance/distance.cu
@@ -0,0 +1,934 @@
+/*
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/distance/detail/kernels/rbf_fin_op.cuh>  // rbf_fin_op
+#include <raft/distance/distance-inl.cuh>
+
+/*
+ * Hierarchy of instantiations:
+ *
+ * This file defines the template instantiations for the public API of
+ * raft::distance. To improve compile times, the compilation of the distance
+ * kernels is handled in distance/detail/pairwise_matrix/dispatch_*.cu.
+ *
+ */
+
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT) \
+  template void raft::distance::distance<DT, DataT, AccT, OutT, FinalLambda, IdxT>(  \
+    raft::resources const& handle,                                                   \
+    const DataT* x,                                                                  \
+    const DataT* y,                                                                  \
+    OutT* dist,                                                                      \
+    IdxT m,                                                                          \
+    IdxT n,                                                                          \
+    IdxT k,                                                                          \
+    void* workspace,                                                                 \
+    size_t worksize,                                                                 \
+    FinalLambda fin_op,                                                              \
+    bool isRowMajor,                                                                 \
+    DataT metric_arg)
+
+// The following two instances are used in test/distance/gram.cu. Note the use
+// of int64_t for the index type.
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::distance::kernels::detail::rbf_fin_op<float>,
+                                   int64_t);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::distance::kernels::detail::rbf_fin_op<double>,
+                                   int64_t);
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::identity_op,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, raft::identity_op, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, raft::identity_op, int);
+
+#undef instantiate_raft_distance_distance
+
+// Same, but without raft::identity_op
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \
+  template void raft::distance::distance<DT, DataT, AccT, OutT, IdxT>(  \
+    raft::resources const& handle,                                      \
+    const DataT* x,                                                     \
+    const DataT* y,                                                     \
+    OutT* dist,                                                         \
+    IdxT m,                                                             \
+    IdxT n,                                                             \
+    IdxT k,                                                             \
+    void* workspace,                                                    \
+    size_t worksize,                                                    \
+    bool isRowMajor,                                                    \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_distance
+
+// Same, but without workspace
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, IdxT) \
+  template void raft::distance::distance<DT, DataT, AccT, OutT, IdxT>(  \
+    raft::resources const& handle,                                      \
+    const DataT* x,                                                     \
+    const DataT* y,                                                     \
+    OutT* dist,                                                         \
+    IdxT m,                                                             \
+    IdxT n,                                                             \
+    IdxT k,                                                             \
+    bool isRowMajor,                                                    \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT)  \
+  template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>( \
+    const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CorrelationExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CorrelationExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CosineExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::CosineExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HammingUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HammingUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HellingerExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::HellingerExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtExpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2SqrtUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Linf, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Linf, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::LpUnexpanded, double, double, double, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::RusselRaoExpanded, float, float, float, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::RusselRaoExpanded, double, double, double, int);
+
+#undef instantiate_raft_distance_getWorkspaceSize
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT, layout)  \
+  template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT, layout>( \
+    raft::device_matrix_view<DataT, IdxT, layout> const& x,                                 \
+    raft::device_matrix_view<DataT, IdxT, layout> const& y)
+
+// We could consider not taking template parameters for this function. The
+// number of instantiations seems a bit excessive..
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::Canberra, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CorrelationExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::CosineExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HammingUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::HellingerExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::InnerProduct,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::InnerProduct, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::InnerProduct,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::JensenShannon,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::JensenShannon, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::JensenShannon,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::KLDivergence,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::KLDivergence, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::KLDivergence,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L1, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, float, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, double, int, raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtExpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           float,
+                                           float,
+                                           float,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_f_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(raft::distance::DistanceType::L2Unexpanded,
+                                           double,
+                                           double,
+                                           double,
+                                           int,
+                                           raft::layout_c_contiguous);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, int, raft::layout_f_contiguous);
+
+#undef instantiate_raft_distance_getWorkspaceSize
+
+#define instantiate_raft_distance_pairwise_distance(DataT, IdxT)                        \
+  template void raft::distance::pairwise_distance(raft::resources const& handle,        \
+                                                  const DataT* x,                       \
+                                                  const DataT* y,                       \
+                                                  DataT* dist,                          \
+                                                  IdxT m,                               \
+                                                  IdxT n,                               \
+                                                  IdxT k,                               \
+                                                  rmm::device_uvector<char>& workspace, \
+                                                  raft::distance::DistanceType metric,  \
+                                                  bool isRowMajor,                      \
+                                                  DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, int);
+instantiate_raft_distance_pairwise_distance(double, int);
+
+#undef instantiate_raft_distance_pairwise_distance
+
+// Same, but without workspace
+#define instantiate_raft_distance_pairwise_distance(DataT, IdxT)                       \
+  template void raft::distance::pairwise_distance(raft::resources const& handle,       \
+                                                  const DataT* x,                      \
+                                                  const DataT* y,                      \
+                                                  DataT* dist,                         \
+                                                  IdxT m,                              \
+                                                  IdxT n,                              \
+                                                  IdxT k,                              \
+                                                  raft::distance::DistanceType metric, \
+                                                  bool isRowMajor,                     \
+                                                  DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, int);
+instantiate_raft_distance_pairwise_distance(double, int);
+
+#undef instantiate_raft_distance_pairwise_distance
+
+// Version with mdspan
+#define instantiate_raft_distance_distance(DistT, DataT, AccT, OutT, layout, IdxT) \
+  template void raft::distance::distance<DistT, DataT, AccT, OutT, layout, IdxT>(  \
+    raft::resources const& handle,                                                 \
+    raft::device_matrix_view<DataT, IdxT, layout> const x,                         \
+    raft::device_matrix_view<DataT, IdxT, layout> const y,                         \
+    raft::device_matrix_view<OutT, IdxT, layout> dist,                             \
+    DataT metric_arg)
+
+// Again, we might want to consider reigning in the number of instantiations...
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Canberra, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CorrelationExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::CosineExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HammingUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::HellingerExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::InnerProduct,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::InnerProduct, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::InnerProduct,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::JensenShannon,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::JensenShannon, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::JensenShannon,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::KLDivergence,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::KLDivergence, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::KLDivergence,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L1, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Expanded, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2SqrtUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::L2Unexpanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Unexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::Linf, double, double, double, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::LpUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(
+  raft::distance::DistanceType::LpUnexpanded, float, float, float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::LpUnexpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_c_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   float,
+                                   float,
+                                   float,
+                                   raft::layout_f_contiguous,
+                                   int);
+instantiate_raft_distance_distance(raft::distance::DistanceType::RusselRaoExpanded,
+                                   double,
+                                   double,
+                                   double,
+                                   raft::layout_f_contiguous,
+                                   int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_pairwise_distance(DataT, layout, IdxT) \
+  template void raft::distance::pairwise_distance(                       \
+    raft::resources const& handle,                                       \
+    raft::device_matrix_view<DataT, IdxT, layout> const x,               \
+    raft::device_matrix_view<DataT, IdxT, layout> const y,               \
+    raft::device_matrix_view<DataT, IdxT, layout> dist,                  \
+    raft::distance::DistanceType metric,                                 \
+    DataT metric_arg)
+
+instantiate_raft_distance_pairwise_distance(float, raft::layout_c_contiguous, int);
+instantiate_raft_distance_pairwise_distance(float, raft::layout_f_contiguous, int);
+instantiate_raft_distance_pairwise_distance(double, raft::layout_c_contiguous, int);
+instantiate_raft_distance_pairwise_distance(double, raft::layout_f_contiguous, int);
+
+#undef instantiate_raft_distance_pairwise_distance
diff --git a/cpp/src/distance/fused_l2_nn.cu b/cpp/src/distance/fused_l2_nn.cu
new file mode 100644
index 0000000000..6011aaec29
--- /dev/null
+++ b/cpp/src/distance/fused_l2_nn.cu
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>            // int64_t
+#include <raft/core/kvp.hpp>  // raft::KeyValuePair
+#include <raft/distance/fused_l2_nn-inl.cuh>
+
+#define instantiate_raft_distance_fusedL2NNMinReduce(DataT, OutT, IdxT)                   \
+  template void raft::distance::fusedL2NNMinReduce<DataT, OutT, IdxT>(OutT * min,         \
+                                                                      const DataT* x,     \
+                                                                      const DataT* y,     \
+                                                                      const DataT* xn,    \
+                                                                      const DataT* yn,    \
+                                                                      IdxT m,             \
+                                                                      IdxT n,             \
+                                                                      IdxT k,             \
+                                                                      void* workspace,    \
+                                                                      bool sqrt,          \
+                                                                      bool initOutBuffer, \
+                                                                      cudaStream_t stream)
+
+instantiate_raft_distance_fusedL2NNMinReduce(double, double, int);
+instantiate_raft_distance_fusedL2NNMinReduce(double, double, int64_t);
+instantiate_raft_distance_fusedL2NNMinReduce(float, float, int);
+instantiate_raft_distance_fusedL2NNMinReduce(float, float, int64_t);
+
+// We can't have comma's in the macro expansion, so we use the COMMA macro:
+#define COMMA ,
+
+instantiate_raft_distance_fusedL2NNMinReduce(double, raft::KeyValuePair<int COMMA double>, int);
+instantiate_raft_distance_fusedL2NNMinReduce(double,
+                                             raft::KeyValuePair<int64_t COMMA double>,
+                                             int64_t);
+instantiate_raft_distance_fusedL2NNMinReduce(float, raft::KeyValuePair<int COMMA float>, int);
+instantiate_raft_distance_fusedL2NNMinReduce(float,
+                                             raft::KeyValuePair<int64_t COMMA float>,
+                                             int64_t);
+
+#undef COMMA
+
+#undef instantiate_raft_distance_fusedL2NNMinReduce
diff --git a/cpp/src/distance/specializations/detail/00_write_template.py b/cpp/src/distance/specializations/detail/00_write_template.py
deleted file mode 100644
index 3f2f853569..0000000000
--- a/cpp/src/distance/specializations/detail/00_write_template.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-
-# NOTE: this template is not perfectly formatted. Use pre-commit to get
-# everything in shape again.
-template = """/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp> // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh> // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh> // pairwise_matrix_instantiation_point
-INCLUDE_SM_HEADERS
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<OpT,
-                                                  IdxT,
-                                                  DataT,
-                                                  OutT,
-                                                  FinopT>(
-  OpT,
-  pairwise_matrix_params<IdxT, DataT, OutT, FinopT>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
-"""
-
-data_type_instances = [
-    dict(
-        DataT="float",
-        AccT="float",
-        OutT="float",
-        IdxT="int",
-    ),
-    dict(
-        DataT="double",
-        AccT="double",
-        OutT="double",
-        IdxT="int",
-    ),
-]
-
-op_instances = [
-    dict(
-        path_prefix="canberra",
-        OpT="ops::canberra_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="correlation",
-        OpT="ops::correlation_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="cosine",
-        OpT="ops::cosine_distance_op<DataT, AccT, IdxT>",
-        archs = [60, 80],
-    ),
-    dict(
-        path_prefix="hamming_unexpanded",
-        OpT="ops::hamming_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="hellinger_expanded",
-        OpT="ops::hellinger_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    # inner product is handled by cublas.
-    dict(
-        path_prefix="jensen_shannon",
-        OpT="ops::jensen_shannon_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="kl_divergence",
-        OpT="ops::kl_divergence_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="l1",
-        OpT="ops::l1_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="l2_expanded",
-        OpT="ops::l2_exp_distance_op<DataT, AccT, IdxT>",
-        archs = [60, 80],
-    ),
-    dict(
-        path_prefix="l2_unexpanded",
-        OpT="ops::l2_unexp_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="l_inf",
-        OpT="ops::l_inf_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="lp_unexpanded",
-        OpT="ops::lp_unexp_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-    ),
-    dict(
-        path_prefix="russel_rao",
-        OpT="ops::russel_rao_distance_op<DataT, AccT, IdxT>",
-        archs = [60],
-     ),
-]
-
-def fill_in(s, template):
-    for k, v in template.items():
-        s = s.replace(k, v)
-    return s
-
-def fill_include_sm_headers(op_instance):
-    include_headers ="\n".join([
-        f"#include <raft/distance/detail/pairwise_matrix/dispatch_sm{arch}.cuh>"
-        for arch in op_instance["archs"]
-    ])
-
-    return {
-        "path_prefix": op_instance["path_prefix"],
-        "OpT": op_instance["OpT"],
-        "INCLUDE_SM_HEADERS": include_headers
-    }
-
-for op_instance in op_instances:
-    op_instance = fill_include_sm_headers(op_instance)
-
-    for data_type_instance in data_type_instances:
-        op_data_instance = {
-            k : fill_in(v, data_type_instance)
-            for k, v in op_instance.items()
-        }
-        instance = {
-            **op_data_instance,
-            **data_type_instance,
-            "FinopT": "decltype(raft::identity_op())",
-        }
-
-        text = fill_in(template, instance)
-
-        path = fill_in("path_prefix_DataT_AccT_OutT_IdxT.cu", instance)
-        with open(path, "w") as f:
-            f.write(text)
diff --git a/cpp/src/distance/specializations/detail/canberra_double_double_double_int.cu b/cpp/src/distance/specializations/detail/canberra_double_double_double_int.cu
deleted file mode 100644
index 037d218178..0000000000
--- a/cpp/src/distance/specializations/detail/canberra_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::canberra_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::canberra_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/canberra_float_float_float_int.cu b/cpp/src/distance/specializations/detail/canberra_float_float_float_int.cu
deleted file mode 100644
index 0ed8ea7bb0..0000000000
--- a/cpp/src/distance/specializations/detail/canberra_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::canberra_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::canberra_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/correlation_double_double_double_int.cu b/cpp/src/distance/specializations/detail/correlation_double_double_double_int.cu
deleted file mode 100644
index 0c11f0621e..0000000000
--- a/cpp/src/distance/specializations/detail/correlation_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::correlation_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::correlation_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/correlation_float_float_float_int.cu b/cpp/src/distance/specializations/detail/correlation_float_float_float_int.cu
deleted file mode 100644
index 396e158554..0000000000
--- a/cpp/src/distance/specializations/detail/correlation_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::correlation_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::correlation_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/cosine_double_double_double_int.cu b/cpp/src/distance/specializations/detail/cosine_double_double_double_int.cu
deleted file mode 100644
index e9afb6f563..0000000000
--- a/cpp/src/distance/specializations/detail/cosine_double_double_double_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::cosine_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::cosine_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/cosine_float_float_float_int.cu b/cpp/src/distance/specializations/detail/cosine_float_float_float_int.cu
deleted file mode 100644
index 1033c491d6..0000000000
--- a/cpp/src/distance/specializations/detail/cosine_float_float_float_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::cosine_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::cosine_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu b/cpp/src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
deleted file mode 100644
index 195115914d..0000000000
--- a/cpp/src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::hamming_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::hamming_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu b/cpp/src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
deleted file mode 100644
index a74c6c404e..0000000000
--- a/cpp/src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::hamming_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::hamming_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu b/cpp/src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
deleted file mode 100644
index bac1dd7bd0..0000000000
--- a/cpp/src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::hellinger_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::hellinger_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu b/cpp/src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
deleted file mode 100644
index 77c113b1a9..0000000000
--- a/cpp/src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::hellinger_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::hellinger_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/inner_product_double_double_double_int.cu b/cpp/src/distance/specializations/detail/inner_product_double_double_double_int.cu
deleted file mode 100644
index 3db0a3572e..0000000000
--- a/cpp/src/distance/specializations/detail/inner_product_double_double_double_int.cu
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/distance/detail/distance.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-namespace detail {
-template void distance<raft::distance::DistanceType::InnerProduct, double, double, double, int>(
-  raft::resources const& handle,
-  const double* x,
-  const double* y,
-  double* dist,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  std::size_t worksize,
-  bool isRowMajor,
-  double metric_arg);
-
-}  // namespace detail
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/distance/specializations/detail/inner_product_float_float_float_int.cu b/cpp/src/distance/specializations/detail/inner_product_float_float_float_int.cu
deleted file mode 100644
index 2b06ca4dc2..0000000000
--- a/cpp/src/distance/specializations/detail/inner_product_float_float_float_int.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/distance/detail/distance.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-namespace detail {
-template void distance<raft::distance::DistanceType::InnerProduct, float, float, float, int>(
-  raft::resources const& handle,
-  const float* x,
-  const float* y,
-  float* dist,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  std::size_t worksize,
-  bool isRowMajor,
-  float metric_arg);
-}  // namespace detail
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu b/cpp/src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
deleted file mode 100644
index 188e52c152..0000000000
--- a/cpp/src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void
-  pairwise_matrix_instantiation_point<ops::jensen_shannon_distance_op<double, double, int>,
-                                      int,
-                                      double,
-                                      double,
-                                      decltype(raft::identity_op())>(
-    ops::jensen_shannon_distance_op<double, double, int>,
-    pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-    cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu b/cpp/src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
deleted file mode 100644
index b0afbf7bb2..0000000000
--- a/cpp/src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void
-  pairwise_matrix_instantiation_point<ops::jensen_shannon_distance_op<float, float, int>,
-                                      int,
-                                      float,
-                                      float,
-                                      decltype(raft::identity_op())>(
-    ops::jensen_shannon_distance_op<float, float, int>,
-    pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-    cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu b/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu
deleted file mode 100644
index ab818db73b..0000000000
--- a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_double.cu
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
-
-template class raft::distance::kernels::detail::TanhKernel<double>;
\ No newline at end of file
diff --git a/cpp/src/distance/specializations/detail/kl_divergence_double_double_double_int.cu b/cpp/src/distance/specializations/detail/kl_divergence_double_double_double_int.cu
deleted file mode 100644
index f06ae85414..0000000000
--- a/cpp/src/distance/specializations/detail/kl_divergence_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::kl_divergence_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::kl_divergence_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/kl_divergence_float_float_float_int.cu b/cpp/src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
deleted file mode 100644
index 00d5a5ee5b..0000000000
--- a/cpp/src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::kl_divergence_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::kl_divergence_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l1_double_double_double_int.cu b/cpp/src/distance/specializations/detail/l1_double_double_double_int.cu
deleted file mode 100644
index 5c235316da..0000000000
--- a/cpp/src/distance/specializations/detail/l1_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l1_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::l1_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l1_float_float_float_int.cu b/cpp/src/distance/specializations/detail/l1_float_float_float_int.cu
deleted file mode 100644
index fb293ca83d..0000000000
--- a/cpp/src/distance/specializations/detail/l1_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l1_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::l1_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l2_expanded_double_double_double_int.cu b/cpp/src/distance/specializations/detail/l2_expanded_double_double_double_int.cu
deleted file mode 100644
index 2c02f0224f..0000000000
--- a/cpp/src/distance/specializations/detail/l2_expanded_double_double_double_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l2_exp_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::l2_exp_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l2_expanded_float_float_float_int.cu b/cpp/src/distance/specializations/detail/l2_expanded_float_float_float_int.cu
deleted file mode 100644
index 85e25a25ca..0000000000
--- a/cpp/src/distance/specializations/detail/l2_expanded_float_float_float_int.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm80.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l2_exp_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::l2_exp_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu b/cpp/src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
deleted file mode 100644
index 5b4d995d14..0000000000
--- a/cpp/src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l2_unexp_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::l2_unexp_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu b/cpp/src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
deleted file mode 100644
index a63c3f0bb8..0000000000
--- a/cpp/src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l2_unexp_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::l2_unexp_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l_inf_double_double_double_int.cu b/cpp/src/distance/specializations/detail/l_inf_double_double_double_int.cu
deleted file mode 100644
index 831167523f..0000000000
--- a/cpp/src/distance/specializations/detail/l_inf_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l_inf_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::l_inf_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/l_inf_float_float_float_int.cu b/cpp/src/distance/specializations/detail/l_inf_float_float_float_int.cu
deleted file mode 100644
index 02e667cbe3..0000000000
--- a/cpp/src/distance/specializations/detail/l_inf_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::l_inf_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::l_inf_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu b/cpp/src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
deleted file mode 100644
index ebd71065ec..0000000000
--- a/cpp/src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::lp_unexp_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::lp_unexp_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu b/cpp/src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
deleted file mode 100644
index b94a81fdce..0000000000
--- a/cpp/src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::lp_unexp_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::lp_unexp_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/russel_rao_double_double_double_int.cu b/cpp/src/distance/specializations/detail/russel_rao_double_double_double_int.cu
deleted file mode 100644
index 6f952fcc37..0000000000
--- a/cpp/src/distance/specializations/detail/russel_rao_double_double_double_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::russel_rao_distance_op<double, double, int>,
-                                                  int,
-                                                  double,
-                                                  double,
-                                                  decltype(raft::identity_op())>(
-  ops::russel_rao_distance_op<double, double, int>,
-  pairwise_matrix_params<int, double, double, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/detail/russel_rao_float_float_float_int.cu b/cpp/src/distance/specializations/detail/russel_rao_float_float_float_int.cu
deleted file mode 100644
index 3223ce33a7..0000000000
--- a/cpp/src/distance/specializations/detail/russel_rao_float_float_float_int.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/operators.hpp>                            // raft::identity_op
-#include <raft/distance/detail/distance_ops/all_ops.cuh>      // ops::*
-#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>  // pairwise_matrix_instantiation_point
-#include <raft/distance/detail/pairwise_matrix/dispatch_sm60.cuh>
-
-namespace raft::distance::detail {
-
-template void pairwise_matrix_instantiation_point<ops::russel_rao_distance_op<float, float, int>,
-                                                  int,
-                                                  float,
-                                                  float,
-                                                  decltype(raft::identity_op())>(
-  ops::russel_rao_distance_op<float, float, int>,
-  pairwise_matrix_params<int, float, float, decltype(raft::identity_op())>,
-  cudaStream_t);
-
-}  // namespace raft::distance::detail
diff --git a/cpp/src/distance/specializations/fused_l2_nn_double_int.cu b/cpp/src/distance/specializations/fused_l2_nn_double_int.cu
deleted file mode 100644
index b49132b042..0000000000
--- a/cpp/src/distance/specializations/fused_l2_nn_double_int.cu
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/kvp.hpp>
-#include <raft/distance/fused_l2_nn.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-
-template void fusedL2NNMinReduce<double, raft::KeyValuePair<int, double>, int>(
-  raft::KeyValuePair<int, double>* min,
-  const double* x,
-  const double* y,
-  const double* xn,
-  const double* yn,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-template void fusedL2NNMinReduce<double, double, int>(double* min,
-                                                      const double* x,
-                                                      const double* y,
-                                                      const double* xn,
-                                                      const double* yn,
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      void* workspace,
-                                                      bool sqrt,
-                                                      bool initOutBuffer,
-                                                      cudaStream_t stream);
-
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/distance/specializations/fused_l2_nn_double_int64.cu b/cpp/src/distance/specializations/fused_l2_nn_double_int64.cu
deleted file mode 100644
index b1e3a900a9..0000000000
--- a/cpp/src/distance/specializations/fused_l2_nn_double_int64.cu
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/kvp.hpp>
-#include <raft/distance/fused_l2_nn.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-
-template void fusedL2NNMinReduce<double, raft::KeyValuePair<int64_t, double>, int64_t>(
-  raft::KeyValuePair<int64_t, double>* min,
-  const double* x,
-  const double* y,
-  const double* xn,
-  const double* yn,
-  int64_t m,
-  int64_t n,
-  int64_t k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-template void fusedL2NNMinReduce<double, double, int64_t>(double* min,
-                                                          const double* x,
-                                                          const double* y,
-                                                          const double* xn,
-                                                          const double* yn,
-                                                          int64_t m,
-                                                          int64_t n,
-                                                          int64_t k,
-                                                          void* workspace,
-                                                          bool sqrt,
-                                                          bool initOutBuffer,
-                                                          cudaStream_t stream);
-
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/distance/specializations/fused_l2_nn_float_int.cu b/cpp/src/distance/specializations/fused_l2_nn_float_int.cu
deleted file mode 100644
index 44b4953d8c..0000000000
--- a/cpp/src/distance/specializations/fused_l2_nn_float_int.cu
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/kvp.hpp>
-#include <raft/distance/fused_l2_nn.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-
-template void fusedL2NNMinReduce<float, raft::KeyValuePair<int, float>, int>(
-  raft::KeyValuePair<int, float>* min,
-  const float* x,
-  const float* y,
-  const float* xn,
-  const float* yn,
-  int m,
-  int n,
-  int k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-template void fusedL2NNMinReduce<float, float, int>(float* min,
-                                                    const float* x,
-                                                    const float* y,
-                                                    const float* xn,
-                                                    const float* yn,
-                                                    int m,
-                                                    int n,
-                                                    int k,
-                                                    void* workspace,
-                                                    bool sqrt,
-                                                    bool initOutBuffer,
-                                                    cudaStream_t stream);
-
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/distance/specializations/fused_l2_nn_float_int64.cu b/cpp/src/distance/specializations/fused_l2_nn_float_int64.cu
deleted file mode 100644
index 9ca2b639a9..0000000000
--- a/cpp/src/distance/specializations/fused_l2_nn_float_int64.cu
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/core/kvp.hpp>
-#include <raft/distance/fused_l2_nn.cuh>
-#include <raft/distance/specializations.cuh>
-
-namespace raft {
-namespace distance {
-
-template void fusedL2NNMinReduce<float, raft::KeyValuePair<int64_t, float>, int64_t>(
-  raft::KeyValuePair<int64_t, float>* min,
-  const float* x,
-  const float* y,
-  const float* xn,
-  const float* yn,
-  int64_t m,
-  int64_t n,
-  int64_t k,
-  void* workspace,
-  bool sqrt,
-  bool initOutBuffer,
-  cudaStream_t stream);
-template void fusedL2NNMinReduce<float, float, int64_t>(float* min,
-                                                        const float* x,
-                                                        const float* y,
-                                                        const float* xn,
-                                                        const float* yn,
-                                                        int64_t m,
-                                                        int64_t n,
-                                                        int64_t k,
-                                                        void* workspace,
-                                                        bool sqrt,
-                                                        bool initOutBuffer,
-                                                        cudaStream_t stream);
-
-}  // namespace distance
-}  // namespace raft
diff --git a/cpp/src/linalg/detail/coalesced_reduction.cu b/cpp/src/linalg/detail/coalesced_reduction.cu
new file mode 100644
index 0000000000..00d025df46
--- /dev/null
+++ b/cpp/src/linalg/detail/coalesced_reduction.cu
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// #include <raft/linalg/detail/coalesced_reduction-ext.cuh>
+
+#include <raft/linalg/detail/coalesced_reduction-inl.cuh>
+
+#define instantiate_raft_linalg_detail_coalescedReduction(                       \
+  InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda)               \
+  template void raft::linalg::detail::coalescedReduction(OutType* dots,          \
+                                                         const InType* data,     \
+                                                         IdxType D,              \
+                                                         IdxType N,              \
+                                                         OutType init,           \
+                                                         cudaStream_t stream,    \
+                                                         bool inplace,           \
+                                                         MainLambda main_op,     \
+                                                         ReduceLambda reduce_op, \
+                                                         FinalLambda final_op)
+
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::identity_op, raft::min_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::abs_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  double, double, int, raft::abs_op, raft::max_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::abs_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::identity_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::identity_op, raft::min_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, long, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op);
+instantiate_raft_linalg_detail_coalescedReduction(
+  float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op);
+
+#undef instantiate_raft_linalg_detail_coalescedReduction
diff --git a/cpp/src/matrix/detail/select_k_double_int64_t.cu b/cpp/src/matrix/detail/select_k_double_int64_t.cu
new file mode 100644
index 0000000000..022627283a
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_double_int64_t.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(double, int64_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/detail/select_k_double_uint32_t.cu b/cpp/src/matrix/detail/select_k_double_uint32_t.cu
new file mode 100644
index 0000000000..22c6989337
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_double_uint32_t.cu
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>  // uint32_t
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(double, uint32_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/detail/select_k_float_int64_t.cu b/cpp/src/matrix/detail/select_k_float_int64_t.cu
new file mode 100644
index 0000000000..1f1d686048
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_float_int64_t.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(float, int64_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/detail/select_k_float_uint32_t.cu b/cpp/src/matrix/detail/select_k_float_uint32_t.cu
new file mode 100644
index 0000000000..3bb47acbf2
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_float_uint32_t.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(float, uint32_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/detail/select_k_half_int64_t.cu b/cpp/src/matrix/detail/select_k_half_int64_t.cu
new file mode 100644
index 0000000000..cf4e15959d
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_half_int64_t.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(__half, int64_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/detail/select_k_half_uint32_t.cu b/cpp/src/matrix/detail/select_k_half_uint32_t.cu
new file mode 100644
index 0000000000..b18887bfc0
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_half_uint32_t.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(__half, uint32_t);
+
+#undef instantiate_raft_matrix_detail_select_k
diff --git a/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu b/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu
deleted file mode 100644
index 370ab1ba50..0000000000
--- a/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/specializations.cuh>
-
-namespace raft::matrix::detail {
-
-#define RAFT_INST(T, IdxT)                               \
-  template void select_k<T, IdxT>(const T*,              \
-                                  const IdxT*,           \
-                                  size_t,                \
-                                  size_t,                \
-                                  int,                   \
-                                  T*,                    \
-                                  IdxT*,                 \
-                                  bool,                  \
-                                  rmm::cuda_stream_view, \
-                                  rmm::mr::device_memory_resource*);
-
-RAFT_INST(float, int64_t);
-
-}  // namespace raft::matrix::detail
diff --git a/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu b/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu
deleted file mode 100644
index c6733c2a46..0000000000
--- a/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/specializations.cuh>
-
-namespace raft::matrix::detail {
-
-#define RAFT_INST(T, IdxT)                               \
-  template void select_k<T, IdxT>(const T*,              \
-                                  const IdxT*,           \
-                                  size_t,                \
-                                  size_t,                \
-                                  int,                   \
-                                  T*,                    \
-                                  IdxT*,                 \
-                                  bool,                  \
-                                  rmm::cuda_stream_view, \
-                                  rmm::mr::device_memory_resource*);
-
-RAFT_INST(float, uint32_t);
-
-}  // namespace raft::matrix::detail
diff --git a/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu b/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu
deleted file mode 100644
index 38e28ac54d..0000000000
--- a/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/specializations.cuh>
-
-namespace raft::matrix::detail {
-
-#define RAFT_INST(T, IdxT)                               \
-  template void select_k<T, IdxT>(const T*,              \
-                                  const IdxT*,           \
-                                  size_t,                \
-                                  size_t,                \
-                                  int,                   \
-                                  T*,                    \
-                                  IdxT*,                 \
-                                  bool,                  \
-                                  rmm::cuda_stream_view, \
-                                  rmm::mr::device_memory_resource*);
-
-RAFT_INST(half, int64_t);
-
-}  // namespace raft::matrix::detail
diff --git a/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu b/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu
deleted file mode 100644
index 108bd30b49..0000000000
--- a/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/specializations.cuh>
-
-namespace raft::matrix::detail {
-
-#define RAFT_INST(T, IdxT)                               \
-  template void select_k<T, IdxT>(const T*,              \
-                                  const IdxT*,           \
-                                  size_t,                \
-                                  size_t,                \
-                                  int,                   \
-                                  T*,                    \
-                                  IdxT*,                 \
-                                  bool,                  \
-                                  rmm::cuda_stream_view, \
-                                  rmm::mr::device_memory_resource*);
-
-RAFT_INST(half, uint32_t);
-
-}  // namespace raft::matrix::detail
diff --git a/cpp/src/neighbors/ball_cover.cu b/cpp/src/neighbors/ball_cover.cu
new file mode 100644
index 0000000000..4c49c1847b
--- /dev/null
+++ b/cpp/src/neighbors/ball_cover.cu
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>
+#include <raft/neighbors/ball_cover-inl.cuh>
+
+#define instantiate_raft_neighbors_ball_cover(idx_t, value_t, int_t, matrix_idx_t)                 \
+  template void raft::neighbors::ball_cover::build_index<idx_t, value_t, int_t, matrix_idx_t>(     \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index);      \
+                                                                                                   \
+  template void raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(   \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
+    int_t k,                                                                                       \
+    idx_t* inds,                                                                                   \
+    value_t* dists,                                                                                \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  template void raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(   \
+    raft::device_resources const& handle,                                                          \
+    raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
+    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
+    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
+    int_t k,                                                                                       \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t>(                     \
+    raft::device_resources const& handle,                                                          \
+    const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t>& index,               \
+    int_t k,                                                                                       \
+    const value_t* query,                                                                          \
+    int_t n_query_pts,                                                                             \
+    idx_t* inds,                                                                                   \
+    value_t* dists,                                                                                \
+    bool perform_post_filtering,                                                                   \
+    float weight);                                                                                 \
+                                                                                                   \
+  template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t, matrix_idx_t>(       \
+    raft::device_resources const& handle,                                                          \
+    const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index, \
+    raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,                        \
+    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
+    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
+    int_t k,                                                                                       \
+    bool perform_post_filtering,                                                                   \
+    float weight);
+
+instantiate_raft_neighbors_ball_cover(int64_t, float, uint32_t, uint32_t);
+
+#undef instantiate_raft_neighbors_ball_cover
diff --git a/cpp/src/neighbors/brute_force_00_generate.py b/cpp/src/neighbors/brute_force_00_generate.py
new file mode 100644
index 0000000000..251dd53b1c
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_00_generate.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+"""
+
+knn_macro = """
+#define instantiate_raft_neighbors_brute_force_knn(idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op) \\
+    template void raft::neighbors::brute_force::knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>( \\
+        raft::device_resources const& handle,                           \\
+        std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \\
+        raft::device_matrix_view<const value_t, matrix_idx, search_layout> search, \\
+        raft::device_matrix_view<idx_t, matrix_idx, row_major> indices, \\
+        raft::device_matrix_view<value_t, matrix_idx, row_major> distances, \\
+        raft::distance::DistanceType metric,                            \\
+        std::optional<float> metric_arg,                                \\
+        std::optional<idx_t> global_id_offset,                          \\
+        epilogue_op distance_epilogue);
+
+"""
+
+fused_l2_knn_macro = """
+#define instantiate_raft_neighbors_brute_force_fused_l2_knn(value_t, idx_t, idx_layout, query_layout) \\
+    template void raft::neighbors::brute_force::fused_l2_knn(    \\
+        raft::device_resources const& handle,                           \\
+        raft::device_matrix_view<const value_t, idx_t, idx_layout> index, \\
+        raft::device_matrix_view<const value_t, idx_t, query_layout> query, \\
+        raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,     \\
+        raft::device_matrix_view<value_t, idx_t, row_major> out_dists,  \\
+        raft::distance::DistanceType metric);
+
+"""
+
+knn_types = dict(
+    int64_t_float_uint32_t=("int64_t","float","uint32_t"),
+    int64_t_float_int64_t=("int64_t","float","int64_t"),
+    int_float_int=("int","float","int"),
+    uint32_t_float_uint32_t=("uint32_t","float","uint32_t"),
+)
+
+fused_l2_knn_types = dict(
+    float_int64_t=("float", "int64_t"),
+)
+
+# knn
+for type_path, (idx_t, value_t, matrix_idx) in knn_types.items():
+    path = f"brute_force_knn_{type_path}.cu"
+    with open(path, "w") as f:
+        f.write(header)
+        f.write(knn_macro)
+        f.write(f"instantiate_raft_neighbors_brute_force_knn({idx_t},{value_t},{matrix_idx},raft::row_major,raft::row_major,raft::identity_op);\n\n")
+        f.write("#undef instantiate_raft_neighbors_brute_force_knn\n")
+
+    # For pasting into CMakeLists.txt
+    print(f"src/neighbors/{path}")
+
+#fused_l2_knn
+for type_path, (value_t, idx_t) in fused_l2_knn_types.items():
+    path = f"brute_force_fused_l2_knn_{type_path}.cu"
+    with open(path, "w") as f:
+        f.write(header)
+        f.write(fused_l2_knn_macro)
+        f.write(f"instantiate_raft_neighbors_brute_force_fused_l2_knn({value_t},{idx_t},raft::row_major,raft::row_major);\n\n")
+        f.write("#undef instantiate_raft_neighbors_brute_force_fused_l2_knn\n")
+
+    # For pasting into CMakeLists.txt
+    print(f"src/neighbors/{path}")
diff --git a/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu b/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
new file mode 100644
index 0000000000..4e1805f9a8
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
@@ -0,0 +1,45 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+#define instantiate_raft_neighbors_brute_force_fused_l2_knn(            \
+  value_t, idx_t, idx_layout, query_layout)                             \
+  template void raft::neighbors::brute_force::fused_l2_knn(             \
+    raft::device_resources const& handle,                               \
+    raft::device_matrix_view<const value_t, idx_t, idx_layout> index,   \
+    raft::device_matrix_view<const value_t, idx_t, query_layout> query, \
+    raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,         \
+    raft::device_matrix_view<value_t, idx_t, row_major> out_dists,      \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_brute_force_fused_l2_knn(float,
+                                                    int64_t,
+                                                    raft::row_major,
+                                                    raft::row_major);
+
+#undef instantiate_raft_neighbors_brute_force_fused_l2_knn
diff --git a/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu b/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
new file mode 100644
index 0000000000..a668b076d6
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
@@ -0,0 +1,47 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+#define instantiate_raft_neighbors_brute_force_knn(                                         \
+  idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
+  template void raft::neighbors::brute_force::                                              \
+    knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
+      raft::device_resources const& handle,                                                 \
+      std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
+      raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
+      raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
+      raft::device_matrix_view<value_t, matrix_idx, row_major> distances,                   \
+      raft::distance::DistanceType metric,                                                  \
+      std::optional<float> metric_arg,                                                      \
+      std::optional<idx_t> global_id_offset,                                                \
+      epilogue_op distance_epilogue);
+
+instantiate_raft_neighbors_brute_force_knn(
+  int64_t, float, int64_t, raft::row_major, raft::row_major, raft::identity_op);
+
+#undef instantiate_raft_neighbors_brute_force_knn
diff --git a/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu b/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
new file mode 100644
index 0000000000..21cac5034a
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
@@ -0,0 +1,47 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+#define instantiate_raft_neighbors_brute_force_knn(                                         \
+  idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
+  template void raft::neighbors::brute_force::                                              \
+    knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
+      raft::device_resources const& handle,                                                 \
+      std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
+      raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
+      raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
+      raft::device_matrix_view<value_t, matrix_idx, row_major> distances,                   \
+      raft::distance::DistanceType metric,                                                  \
+      std::optional<float> metric_arg,                                                      \
+      std::optional<idx_t> global_id_offset,                                                \
+      epilogue_op distance_epilogue);
+
+instantiate_raft_neighbors_brute_force_knn(
+  int64_t, float, uint32_t, raft::row_major, raft::row_major, raft::identity_op);
+
+#undef instantiate_raft_neighbors_brute_force_knn
diff --git a/cpp/src/neighbors/brute_force_knn_int_float_int.cu b/cpp/src/neighbors/brute_force_knn_int_float_int.cu
new file mode 100644
index 0000000000..b76fe09c2a
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_knn_int_float_int.cu
@@ -0,0 +1,47 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+#define instantiate_raft_neighbors_brute_force_knn(                                         \
+  idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
+  template void raft::neighbors::brute_force::                                              \
+    knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
+      raft::device_resources const& handle,                                                 \
+      std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
+      raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
+      raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
+      raft::device_matrix_view<value_t, matrix_idx, row_major> distances,                   \
+      raft::distance::DistanceType metric,                                                  \
+      std::optional<float> metric_arg,                                                      \
+      std::optional<idx_t> global_id_offset,                                                \
+      epilogue_op distance_epilogue);
+
+instantiate_raft_neighbors_brute_force_knn(
+  int, float, int, raft::row_major, raft::row_major, raft::identity_op);
+
+#undef instantiate_raft_neighbors_brute_force_knn
diff --git a/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu b/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
new file mode 100644
index 0000000000..4d3f627182
--- /dev/null
+++ b/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
@@ -0,0 +1,47 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by brute_force_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python brute_force_00_generate.py
+ *
+ */
+
+#include <cstdint>
+#include <raft/neighbors/brute_force-inl.cuh>
+
+#define instantiate_raft_neighbors_brute_force_knn(                                         \
+  idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
+  template void raft::neighbors::brute_force::                                              \
+    knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
+      raft::device_resources const& handle,                                                 \
+      std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
+      raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
+      raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
+      raft::device_matrix_view<value_t, matrix_idx, row_major> distances,                   \
+      raft::distance::DistanceType metric,                                                  \
+      std::optional<float> metric_arg,                                                      \
+      std::optional<idx_t> global_id_offset,                                                \
+      epilogue_op distance_epilogue);
+
+instantiate_raft_neighbors_brute_force_knn(
+  uint32_t, float, uint32_t, raft::row_major, raft::row_major, raft::identity_op);
+
+#undef instantiate_raft_neighbors_brute_force_knn
diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
new file mode 100644
index 0000000000..4dfa2a707c
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(T, AccT, IdxT)  \
+  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<T, AccT, IdxT>( \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,                                 \
+    const T* queries,                                                                       \
+    const uint32_t* coarse_query_results,                                                   \
+    const uint32_t n_queries,                                                               \
+    const raft::distance::DistanceType metric,                                              \
+    const uint32_t n_probes,                                                                \
+    const uint32_t k,                                                                       \
+    const bool select_min,                                                                  \
+    IdxT* neighbors,                                                                        \
+    float* distances,                                                                       \
+    uint32_t& grid_dim_x,                                                                   \
+    rmm::cuda_stream_view stream)
+
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(float, float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan
diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
new file mode 100644
index 0000000000..2d54248e4d
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(T, AccT, IdxT)  \
+  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<T, AccT, IdxT>( \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,                                 \
+    const T* queries,                                                                       \
+    const uint32_t* coarse_query_results,                                                   \
+    const uint32_t n_queries,                                                               \
+    const raft::distance::DistanceType metric,                                              \
+    const uint32_t n_probes,                                                                \
+    const uint32_t k,                                                                       \
+    const bool select_min,                                                                  \
+    IdxT* neighbors,                                                                        \
+    float* distances,                                                                       \
+    uint32_t& grid_dim_x,                                                                   \
+    rmm::cuda_stream_view stream)
+
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(int8_t, int32_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan
diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
new file mode 100644
index 0000000000..75fe52f3c7
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(T, AccT, IdxT)  \
+  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan<T, AccT, IdxT>( \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,                                 \
+    const T* queries,                                                                       \
+    const uint32_t* coarse_query_results,                                                   \
+    const uint32_t n_queries,                                                               \
+    const raft::distance::DistanceType metric,                                              \
+    const uint32_t n_probes,                                                                \
+    const uint32_t k,                                                                       \
+    const bool select_min,                                                                  \
+    IdxT* neighbors,                                                                        \
+    float* distances,                                                                       \
+    uint32_t& grid_dim_x,                                                                   \
+    rmm::cuda_stream_view stream)
+
+instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan(uint8_t, uint32_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan
diff --git a/cpp/src/neighbors/detail/ivf_flat_search.cu b/cpp/src/neighbors/detail/ivf_flat_search.cu
new file mode 100644
index 0000000000..345a8f499d
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_flat_search.cu
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/detail/ivf_flat_search-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_detail_search(T, IdxT)  \
+  template void raft::neighbors::ivf_flat::detail::search<T, IdxT>( \
+    raft::device_resources const& handle,                           \
+    const search_params& params,                                    \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,         \
+    const T* queries,                                               \
+    uint32_t n_queries,                                             \
+    uint32_t k,                                                     \
+    IdxT* neighbors,                                                \
+    float* distances,                                               \
+    rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_neighbors_ivf_flat_detail_search(float, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_search(int8_t, int64_t);
+instantiate_raft_neighbors_ivf_flat_detail_search(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_detail_search
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
new file mode 100644
index 0000000000..a740d01bd2
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT) \\
+    template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \\
+        const cudaDeviceProp& dev_props,                                \\
+        bool manage_local_topk,                                         \\
+        int locality_hint,                                              \\
+        double preferred_shmem_carveout,                                \\
+        uint32_t pq_bits,                                               \\
+        uint32_t pq_dim,                                                \\
+        uint32_t precomp_data_count,                                    \\
+        uint32_t n_queries,                                             \\
+        uint32_t n_probes,                                              \\
+        uint32_t topk) -> raft::neighbors::ivf_pq::detail::selected<OutT, LutT>; \\
+\\
+    template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>( \\
+        raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,        \\
+        rmm::cuda_stream_view stream,                                   \\
+        uint32_t n_rows,                                                \\
+        uint32_t dim,                                                   \\
+        uint32_t n_probes,                                              \\
+        uint32_t pq_dim,                                                \\
+        uint32_t n_queries,                                             \\
+        raft::distance::DistanceType metric,                                  \\
+        raft::neighbors::ivf_pq::codebook_gen codebook_kind,            \\
+        uint32_t topk,                                                  \\
+        uint32_t max_samples,                                           \\
+        const float* cluster_centers,                                   \\
+        const float* pq_centers,                                        \\
+        const uint8_t* const* pq_dataset,                               \\
+        const uint32_t* cluster_labels,                                 \\
+        const uint32_t* _chunk_indices,                                 \\
+        const float* queries,                                           \\
+        const uint32_t* index_list,                                     \\
+        float* query_kths,                                              \\
+        LutT* lut_scores,                                               \\
+        OutT* _out_scores,                                              \\
+        uint32_t* _out_indices);
+
+
+#define COMMA ,
+"""
+
+trailer = """
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
+"""
+
+types = dict(
+    half_fp8_false=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>"),
+    half_fp8_true=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>"),
+    half_half=("half", "half"),
+    float_half=("float", "half"),
+    float_float= ("float", "float"),
+    float_fp8_false=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>"),
+    float_fp8_true=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>"),
+)
+
+for path_key, (OutT, LutT) in types.items():
+    path = f"ivf_pq_compute_similarity_{path_key}.cu"
+    with open(path, "w") as f:
+        f.write(header)
+        f.write(f"instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT});\n")
+        f.write(trailer)
+    print(f"src/neighbors/detail/{path}")
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
new file mode 100644
index 0000000000..956b7010d5
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
@@ -0,0 +1,73 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, float);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
new file mode 100644
index 0000000000..fba72ad1dd
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
@@ -0,0 +1,74 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
new file mode 100644
index 0000000000..030f429315
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
@@ -0,0 +1,74 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
new file mode 100644
index 0000000000..31a4d7d503
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
@@ -0,0 +1,73 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, half);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
new file mode 100644
index 0000000000..c623c80446
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
@@ -0,0 +1,74 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
new file mode 100644
index 0000000000..f2aaca20db
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
@@ -0,0 +1,74 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
new file mode 100644
index 0000000000..4420b2534b
--- /dev/null
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
@@ -0,0 +1,73 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_pq_compute_similarity_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
+#include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
+  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
+    const cudaDeviceProp& dev_props,                                                    \
+    bool manage_local_topk,                                                             \
+    int locality_hint,                                                                  \
+    double preferred_shmem_carveout,                                                    \
+    uint32_t pq_bits,                                                                   \
+    uint32_t pq_dim,                                                                    \
+    uint32_t precomp_data_count,                                                        \
+    uint32_t n_queries,                                                                 \
+    uint32_t n_probes,                                                                  \
+    uint32_t topk)                                                                      \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
+                                                                                        \
+  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
+    rmm::cuda_stream_view stream,                                                       \
+    uint32_t n_rows,                                                                    \
+    uint32_t dim,                                                                       \
+    uint32_t n_probes,                                                                  \
+    uint32_t pq_dim,                                                                    \
+    uint32_t n_queries,                                                                 \
+    raft::distance::DistanceType metric,                                                \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
+    uint32_t topk,                                                                      \
+    uint32_t max_samples,                                                               \
+    const float* cluster_centers,                                                       \
+    const float* pq_centers,                                                            \
+    const uint8_t* const* pq_dataset,                                                   \
+    const uint32_t* cluster_labels,                                                     \
+    const uint32_t* _chunk_indices,                                                     \
+    const float* queries,                                                               \
+    const uint32_t* index_list,                                                         \
+    float* query_kths,                                                                  \
+    LutT* lut_scores,                                                                   \
+    OutT* _out_scores,                                                                  \
+    uint32_t* _out_indices);
+
+#define COMMA ,
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(half, half);
+
+#undef COMMA
+
+#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select
diff --git a/cpp/src/neighbors/detail/selection_faiss_00_generate.py b/cpp/src/neighbors/detail/selection_faiss_00_generate.py
new file mode 100644
index 0000000000..36ba56c9b3
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_00_generate.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \\
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \\
+                                                  const payload_t* inV, \\
+                                                  size_t n_rows,        \\
+                                                  size_t n_cols,        \\
+                                                  key_t* outK,          \\
+                                                  payload_t* outV,      \\
+                                                  bool select_min,      \\
+                                                  int k,                \\
+                                                  cudaStream_t stream)
+
+"""
+
+types = dict(
+    uint32_t_float=("uint32_t", "float"),
+    int32_t_float=("int32_t", "float"),
+    long_float=("long", "float"),
+    size_t_double=("size_t", "double"),
+    int_double=("int", "double"),
+    size_t_float=("size_t", "float"),
+)
+
+for type_path, (payload_t, key_t) in types.items():
+    path = f"selection_faiss_{type_path}.cu"
+    with open(path, "w") as f:
+        f.write(header)
+        f.write(f"instantiate_raft_neighbors_detail_select_k({payload_t}, {key_t});\n\n")
+        f.write(f"#undef instantiate_raft_neighbors_detail_select_k\n")
+
+    # for pasting into CMakeLists.txt
+    print(f"src/neighbors/detail/{path}")
diff --git a/cpp/src/neighbors/detail/selection_faiss_int32_t_float.cu b/cpp/src/neighbors/detail/selection_faiss_int32_t_float.cu
new file mode 100644
index 0000000000..1f1ece05ae
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_int32_t_float.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(int32_t, float);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_int_double.cu b/cpp/src/neighbors/detail/selection_faiss_int_double.cu
new file mode 100644
index 0000000000..7e832410c4
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_int_double.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(int, double);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_long_float.cu b/cpp/src/neighbors/detail/selection_faiss_long_float.cu
new file mode 100644
index 0000000000..441d54fa30
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_long_float.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(long, float);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_size_t_double.cu b/cpp/src/neighbors/detail/selection_faiss_size_t_double.cu
new file mode 100644
index 0000000000..ca310e7697
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_size_t_double.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(size_t, double);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_size_t_float.cu b/cpp/src/neighbors/detail/selection_faiss_size_t_float.cu
new file mode 100644
index 0000000000..a830e6ecac
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_size_t_float.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(size_t, float);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_uint32_t_float.cu b/cpp/src/neighbors/detail/selection_faiss_uint32_t_float.cu
new file mode 100644
index 0000000000..2fecaa5cf1
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_uint32_t_float.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(uint32_t, float);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/ivf_flat_00_generate.py b/cpp/src/neighbors/ivf_flat_00_generate.py
new file mode 100644
index 0000000000..44ea9709c2
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_00_generate.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+"""
+
+types = dict(
+    float_int64_t= ("float", "int64_t"),
+    int8_t_int64_t=("int8_t", "int64_t"),
+    uint8_t_int64_t=("uint8_t", "int64_t"),
+)
+
+build_macro = """
+#define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)        \\
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>( \\
+    raft::device_resources const& handle,                         \\
+    const raft::neighbors::ivf_flat::index_params& params,        \\
+    const T* dataset,                                             \\
+    IdxT n_rows,                                                  \\
+    uint32_t dim)                                                 \\
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \\
+                                                                  \\
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>( \\
+    raft::device_resources const& handle,                         \\
+    const raft::neighbors::ivf_flat::index_params& params,        \\
+    raft::device_matrix_view<const T, IdxT, row_major> dataset)   \\
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \\
+                                                                  \\
+  template void raft::neighbors::ivf_flat::build<T, IdxT>( \\
+    raft::device_resources const& handle,                         \\
+    const raft::neighbors::ivf_flat::index_params& params,        \\
+    raft::device_matrix_view<const T, IdxT, row_major> dataset,   \\
+    raft::neighbors::ivf_flat::index<T, IdxT>& idx);
+"""
+
+extend_macro = """
+#define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \\
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
+    raft::device_resources const& handle,                                  \\
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \\
+    const T* new_vectors,                                                  \\
+    const IdxT* new_indices,                                               \\
+    IdxT n_rows)                                                           \\
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \\
+                                                                           \\
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
+    raft::device_resources const& handle,                                  \\
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \\
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \\
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \\
+                                                                           \\
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
+    raft::device_resources const& handle,                                  \\
+    raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \\
+    const T* new_vectors,                                                  \\
+    const IdxT* new_indices,                                               \\
+    IdxT n_rows);                                                          \\
+                                                                           \\
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
+    raft::device_resources const& handle,                                  \\
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \\
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
+    raft::neighbors::ivf_flat::index<T, IdxT>* index);
+"""
+
+search_macro = """
+#define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)        \\
+  template void raft::neighbors::ivf_flat::search<T, IdxT>( \\
+    raft::device_resources const& handle,                          \\
+    const raft::neighbors::ivf_flat::search_params& params,        \\
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \\
+    const T* queries,                                              \\
+    uint32_t n_queries,                                            \\
+    uint32_t k,                                                    \\
+    IdxT* neighbors,                                               \\
+    float* distances,                                              \\
+    rmm::mr::device_memory_resource* mr );                         \\
+                                                                   \\
+  template void raft::neighbors::ivf_flat::search<T, IdxT>( \\
+    raft::device_resources const& handle,                          \\
+    const raft::neighbors::ivf_flat::search_params& params,        \\
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \\
+    raft::device_matrix_view<const T, IdxT, row_major> queries,    \\
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,     \\
+    raft::device_matrix_view<float, IdxT, row_major> distances);
+"""
+
+macros = dict(
+    build=dict(
+        definition=build_macro,
+        name="instantiate_raft_neighbors_ivf_flat_build"),
+    extend=dict(
+        definition=extend_macro,
+        name="instantiate_raft_neighbors_ivf_flat_extend"),
+    search=dict(
+        definition=search_macro,
+        name="instantiate_raft_neighbors_ivf_flat_search"),
+)
+
+for type_path, (T, IdxT) in types.items():
+    for macro_path, macro in macros.items():
+        path = f"ivf_flat_{macro_path}_{type_path}.cu"
+        with open(path, "w") as f:
+            f.write(header)
+            f.write(macro['definition'])
+
+
+            f.write(f"{macro['name']}({T}, {IdxT});\n\n")
+            f.write(f"#undef {macro['name']}\n")
+
+        print(f"src/neighbors/{path}")
diff --git a/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
new file mode 100644
index 0000000000..622f7c7d90
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    const T* dataset,                                           \
+    IdxT n_rows,                                                \
+    uint32_t dim)                                               \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset) \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset, \
+    raft::neighbors::ivf_flat::index<T, IdxT>& idx);
+instantiate_raft_neighbors_ivf_flat_build(float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_build
diff --git a/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
new file mode 100644
index 0000000000..7b1eeae32d
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    const T* dataset,                                           \
+    IdxT n_rows,                                                \
+    uint32_t dim)                                               \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset) \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset, \
+    raft::neighbors::ivf_flat::index<T, IdxT>& idx);
+instantiate_raft_neighbors_ivf_flat_build(int8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_build
diff --git a/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..40cf28151f
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    const T* dataset,                                           \
+    IdxT n_rows,                                                \
+    uint32_t dim)                                               \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset) \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
+                                                                \
+  template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::index_params& params,      \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset, \
+    raft::neighbors::ivf_flat::index<T, IdxT>& idx);
+instantiate_raft_neighbors_ivf_flat_build(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_build
diff --git a/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
new file mode 100644
index 0000000000..f7d99d7081
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows)                                                           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows);                                                          \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index);
+instantiate_raft_neighbors_ivf_flat_extend(float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_extend
diff --git a/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
new file mode 100644
index 0000000000..9eec4f9648
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows)                                                           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows);                                                          \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index);
+instantiate_raft_neighbors_ivf_flat_extend(int8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_extend
diff --git a/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..fc24cbff74
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows)                                                           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
+    ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
+    const T* new_vectors,                                                  \
+    const IdxT* new_indices,                                               \
+    IdxT n_rows);                                                          \
+                                                                           \
+  template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
+    raft::device_resources const& handle,                                  \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
+    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
+    raft::neighbors::ivf_flat::index<T, IdxT>* index);
+instantiate_raft_neighbors_ivf_flat_extend(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_extend
diff --git a/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
new file mode 100644
index 0000000000..5a1fae6d5a
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    const T* queries,                                           \
+    uint32_t n_queries,                                         \
+    uint32_t k,                                                 \
+    IdxT* neighbors,                                            \
+    float* distances,                                           \
+    rmm::mr::device_memory_resource* mr);                       \
+                                                                \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    raft::device_matrix_view<const T, IdxT, row_major> queries, \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
+    raft::device_matrix_view<float, IdxT, row_major> distances);
+instantiate_raft_neighbors_ivf_flat_search(float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_search
diff --git a/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
new file mode 100644
index 0000000000..bc84159a41
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    const T* queries,                                           \
+    uint32_t n_queries,                                         \
+    uint32_t k,                                                 \
+    IdxT* neighbors,                                            \
+    float* distances,                                           \
+    rmm::mr::device_memory_resource* mr);                       \
+                                                                \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    raft::device_matrix_view<const T, IdxT, row_major> queries, \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
+    raft::device_matrix_view<float, IdxT, row_major> distances);
+instantiate_raft_neighbors_ivf_flat_search(int8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_search
diff --git a/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..9e70e21af4
--- /dev/null
+++ b/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by ivf_flat_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python ivf_flat_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat-inl.cuh>
+
+#define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    const T* queries,                                           \
+    uint32_t n_queries,                                         \
+    uint32_t k,                                                 \
+    IdxT* neighbors,                                            \
+    float* distances,                                           \
+    rmm::mr::device_memory_resource* mr);                       \
+                                                                \
+  template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
+    raft::device_resources const& handle,                       \
+    const raft::neighbors::ivf_flat::search_params& params,     \
+    const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
+    raft::device_matrix_view<const T, IdxT, row_major> queries, \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
+    raft::device_matrix_view<float, IdxT, row_major> distances);
+instantiate_raft_neighbors_ivf_flat_search(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_flat_search
diff --git a/cpp/src/neighbors/ivfpq_build_float_int64_t.cu b/cpp/src/neighbors/ivfpq_build_float_int64_t.cu
new file mode 100644
index 0000000000..6771964cae
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_build_float_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
+                                                                                         \
+  template auto raft::neighbors::ivf_pq::build(                                          \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    const T* dataset,                                                                    \
+    IdxT n_rows,                                                                         \
+    uint32_t dim)                                                                        \
+    ->raft::neighbors::ivf_pq::index<IdxT>;
+
+instantiate_raft_neighbors_ivf_pq_build(float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_build
diff --git a/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
new file mode 100644
index 0000000000..759045faa7
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
+                                                                                         \
+  template auto raft::neighbors::ivf_pq::build(                                          \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    const T* dataset,                                                                    \
+    IdxT n_rows,                                                                         \
+    uint32_t dim)                                                                        \
+    ->raft::neighbors::ivf_pq::index<IdxT>;
+
+instantiate_raft_neighbors_ivf_pq_build(int8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_build
diff --git a/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..62a47e9bcf
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
+                                                                                         \
+  template auto raft::neighbors::ivf_pq::build(                                          \
+    raft::device_resources const& handle,                                                \
+    const raft::neighbors::ivf_pq::index_params& params,                                 \
+    const T* dataset,                                                                    \
+    IdxT n_rows,                                                                         \
+    uint32_t dim)                                                                        \
+    ->raft::neighbors::ivf_pq::index<IdxT>;
+
+instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_build
diff --git a/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
new file mode 100644
index 0000000000..3e728be38d
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
+                                                                                          \
+  template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows)                                                                          \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows);
+
+instantiate_raft_neighbors_ivf_pq_extend(float, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_extend
diff --git a/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
new file mode 100644
index 0000000000..7853e53f63
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
+                                                                                          \
+  template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows)                                                                          \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows);
+
+instantiate_raft_neighbors_ivf_pq_extend(int8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_extend
diff --git a/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..599a88fc67
--- /dev/null
+++ b/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
+
+#define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
+  template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
+    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
+    raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
+                                                                                          \
+  template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows)                                                                          \
+    ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
+                                                                                          \
+  template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
+    raft::device_resources const& handle,                                                 \
+    raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
+    const T* new_vectors,                                                                 \
+    const IdxT* new_indices,                                                              \
+    IdxT n_rows);
+
+instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
+
+#undef instantiate_raft_neighbors_ivf_pq_extend
diff --git a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
index 91093d3a39..ab946d2b65 100644
--- a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
@@ -14,26 +14,29 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#include <raft_runtime/neighbors/ivf_pq.hpp>
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
+    raft::device_matrix_view<float, IdxT, row_major> distances); \
+                                                                 \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    const T* queries,                                            \
+    uint32_t n_queries,                                          \
+    uint32_t k,                                                  \
+    IdxT* neighbors,                                             \
+    float* distances,                                            \
+    rmm::mr::device_memory_resource* mr)
 
-namespace raft::runtime::neighbors::ivf_pq {
+instantiate_raft_neighbors_ivf_pq_search(float, int64_t);
 
-#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
-              const raft::neighbors::ivf_pq::search_params& params,                               \
-              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
-              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
-              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
-              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
-  {                                                                                               \
-    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
-  }
-
-RAFT_SEARCH_INST(float, int64_t);
-
-#undef RAFT_INST_SEARCH
-
-}  // namespace raft::runtime::neighbors::ivf_pq
+#undef instantiate_raft_neighbors_ivf_pq_search
diff --git a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
index e1552c0b27..af54a9312a 100644
--- a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
@@ -14,26 +14,29 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#include <raft_runtime/neighbors/ivf_pq.hpp>
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
+    raft::device_matrix_view<float, IdxT, row_major> distances); \
+                                                                 \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    const T* queries,                                            \
+    uint32_t n_queries,                                          \
+    uint32_t k,                                                  \
+    IdxT* neighbors,                                             \
+    float* distances,                                            \
+    rmm::mr::device_memory_resource* mr)
 
-namespace raft::runtime::neighbors::ivf_pq {
+instantiate_raft_neighbors_ivf_pq_search(int8_t, int64_t);
 
-#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
-              const raft::neighbors::ivf_pq::search_params& params,                               \
-              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
-              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
-              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
-              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
-  {                                                                                               \
-    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
-  }
-
-RAFT_SEARCH_INST(int8_t, int64_t);
-
-#undef RAFT_INST_SEARCH
-
-}  // namespace raft::runtime::neighbors::ivf_pq
+#undef instantiate_raft_neighbors_ivf_pq_search
diff --git a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
index 85195a7551..7b49487506 100644
--- a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
@@ -14,26 +14,29 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_pq-inl.cuh>
+#include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#include <raft_runtime/neighbors/ivf_pq.hpp>
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
+    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
+    raft::device_matrix_view<float, IdxT, row_major> distances); \
+                                                                 \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
+    raft::device_resources const& handle,                        \
+    const raft::neighbors::ivf_pq::search_params& params,        \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
+    const T* queries,                                            \
+    uint32_t n_queries,                                          \
+    uint32_t k,                                                  \
+    IdxT* neighbors,                                             \
+    float* distances,                                            \
+    rmm::mr::device_memory_resource* mr)
 
-namespace raft::runtime::neighbors::ivf_pq {
+instantiate_raft_neighbors_ivf_pq_search(uint8_t, int64_t);
 
-#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
-              const raft::neighbors::ivf_pq::search_params& params,                               \
-              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
-              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
-              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
-              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
-  {                                                                                               \
-    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
-  }
-
-RAFT_SEARCH_INST(uint8_t, int64_t);
-
-#undef RAFT_INST_SEARCH
-
-}  // namespace raft::runtime::neighbors::ivf_pq
+#undef instantiate_raft_neighbors_ivf_pq_search
diff --git a/cpp/src/neighbors/refine_00_generate.py b/cpp/src/neighbors/refine_00_generate.py
new file mode 100644
index 0000000000..18c8857e3f
--- /dev/null
+++ b/cpp/src/neighbors/refine_00_generate.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by refine_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python refine_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/refine-inl.cuh>
+
+#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)       \\
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(        \\
+    raft::device_resources const& handle,                                              \\
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,             \\
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,             \\
+    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,  \\
+    raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                    \\
+    raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,             \\
+    raft::distance::DistanceType metric);                                              \\
+                                                                                       \\
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(        \\
+    raft::device_resources const& handle,                                              \\
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,               \\
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,               \\
+    raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,    \\
+    raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,                      \\
+    raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,               \\
+    raft::distance::DistanceType metric);
+
+"""
+
+types = dict(
+    float_float= ("float", "float"),
+    int8_t_float=("int8_t", "float"),
+    uint8_t_float=("uint8_t", "float"),
+)
+
+for type_path, (data_t, distance_t) in types.items():
+    path = f"refine_{type_path}.cu"
+    with open(path, "w") as f:
+        f.write(header)
+        f.write(f"instantiate_raft_neighbors_refine(int64_t, {data_t}, {distance_t}, int64_t);\n\n")
+        f.write(f"#undef instantiate_raft_neighbors_refine\n")
+
+    # for pasting into CMakeLists.txt
+    print(f"src/neighbors/{path}")
diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu
new file mode 100644
index 0000000000..7e811fd7e3
--- /dev/null
+++ b/cpp/src/neighbors/refine_float_float.cu
@@ -0,0 +1,50 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by refine_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python refine_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/refine-inl.cuh>
+
+#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
+    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
+    raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                   \
+    raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,            \
+    raft::distance::DistanceType metric);                                             \
+                                                                                      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
+    raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
+    raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,                     \
+    raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,              \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_refine(int64_t, float, float, int64_t);
+
+#undef instantiate_raft_neighbors_refine
diff --git a/cpp/src/neighbors/refine_int8_t_float.cu b/cpp/src/neighbors/refine_int8_t_float.cu
new file mode 100644
index 0000000000..6983c2492c
--- /dev/null
+++ b/cpp/src/neighbors/refine_int8_t_float.cu
@@ -0,0 +1,50 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by refine_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python refine_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/refine-inl.cuh>
+
+#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
+    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
+    raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                   \
+    raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,            \
+    raft::distance::DistanceType metric);                                             \
+                                                                                      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
+    raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
+    raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,                     \
+    raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,              \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t);
+
+#undef instantiate_raft_neighbors_refine
diff --git a/cpp/src/neighbors/refine_uint8_t_float.cu b/cpp/src/neighbors/refine_uint8_t_float.cu
new file mode 100644
index 0000000000..f61bc508c0
--- /dev/null
+++ b/cpp/src/neighbors/refine_uint8_t_float.cu
@@ -0,0 +1,50 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by refine_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python refine_00_generate.py
+ *
+ */
+
+#include <raft/neighbors/refine-inl.cuh>
+
+#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
+    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
+    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
+    raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                   \
+    raft::device_matrix_view<distance_t, matrix_idx, row_major> distances,            \
+    raft::distance::DistanceType metric);                                             \
+                                                                                      \
+  template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
+    raft::device_resources const& handle,                                             \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
+    raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
+    raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
+    raft::host_matrix_view<idx_t, matrix_idx, row_major> indices,                     \
+    raft::host_matrix_view<distance_t, matrix_idx, row_major> distances,              \
+    raft::distance::DistanceType metric);
+
+instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t);
+
+#undef instantiate_raft_neighbors_refine
diff --git a/cpp/src/neighbors/specializations/ball_cover_all_knn_query.cu b/cpp/src/neighbors/specializations/ball_cover_all_knn_query.cu
deleted file mode 100644
index 305dd6796e..0000000000
--- a/cpp/src/neighbors/specializations/ball_cover_all_knn_query.cu
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ball_cover.cuh>
-#include <raft/neighbors/ball_cover_types.hpp>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cstdint>
-
-namespace raft::neighbors::ball_cover {
-template void all_knn_query<std::int64_t, float, std::uint32_t>(
-  raft::device_resources const& handle,
-  BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index,
-  std::uint32_t k,
-  std::int64_t* inds,
-  float* dists,
-  bool perform_post_filtering,
-  float weight);
-
-};  // namespace raft::neighbors::ball_cover
diff --git a/cpp/src/neighbors/specializations/ball_cover_build_index.cu b/cpp/src/neighbors/specializations/ball_cover_build_index.cu
deleted file mode 100644
index ec7f4bcf52..0000000000
--- a/cpp/src/neighbors/specializations/ball_cover_build_index.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ball_cover.cuh>
-#include <raft/neighbors/ball_cover_types.hpp>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cstdint>
-
-namespace raft::neighbors::ball_cover {
-template class BallCoverIndex<int, float, std::uint32_t, std::uint32_t>;
-template class BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>;
-
-template void build_index<std::int64_t, float, std::uint32_t, std::uint32_t>(
-  raft::device_resources const& handle,
-  BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index);
-
-};  // namespace raft::neighbors::ball_cover
diff --git a/cpp/src/neighbors/specializations/ball_cover_knn_query.cu b/cpp/src/neighbors/specializations/ball_cover_knn_query.cu
deleted file mode 100644
index 634427200e..0000000000
--- a/cpp/src/neighbors/specializations/ball_cover_knn_query.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/ball_cover.cuh>
-#include <raft/neighbors/ball_cover_types.hpp>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ball_cover {
-template void knn_query<std::int64_t, float, std::uint32_t>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t, std::uint32_t>& index,
-  std::uint32_t k,
-  const float* query,
-  std::uint32_t n_query_pts,
-  std::int64_t* inds,
-  float* dists,
-  bool perform_post_filtering,
-  float weight);
-
-};  // namespace raft::neighbors::ball_cover
diff --git a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu b/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
deleted file mode 100644
index b69751a62a..0000000000
--- a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/ball_cover/registers.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
-  const float* query,
-  const std::uint32_t n_query_rows,
-  std::uint32_t k,
-  const std::int64_t* R_knn_inds,
-  const float* R_knn_dists,
-  DistFunc<float, std::uint32_t>& dfunc,
-  std::int64_t* inds,
-  float* dists,
-  float weight,
-  std::uint32_t* dists_counter);
-
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
\ No newline at end of file
diff --git a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu b/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
deleted file mode 100644
index ca44ad3165..0000000000
--- a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/ball_cover/registers.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t, 3>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
-  const float* query,
-  const std::uint32_t n_query_rows,
-  std::uint32_t k,
-  const std::int64_t* R_knn_inds,
-  const float* R_knn_dists,
-  DistFunc<float, std::uint32_t>& dfunc,
-  std::int64_t* inds,
-  float* dists,
-  float weight,
-  std::uint32_t* dists_counter);
-
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
\ No newline at end of file
diff --git a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu b/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
deleted file mode 100644
index ba44327653..0000000000
--- a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/ball_cover/registers.cuh>
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void rbc_low_dim_pass_two<std::int64_t, float, std::uint32_t, 2>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
-  const float* query,
-  const std::uint32_t n_query_rows,
-  std::uint32_t k,
-  const std::int64_t* R_knn_inds,
-  const float* R_knn_dists,
-  DistFunc<float, std::uint32_t>& dfunc,
-  std::int64_t* inds,
-  float* dists,
-  float weight,
-  std::uint32_t* post_dists_counter);
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
\ No newline at end of file
diff --git a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu b/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
deleted file mode 100644
index 59132c1f99..0000000000
--- a/cpp/src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/ball_cover/registers.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void rbc_low_dim_pass_two<std::int64_t, float, std::uint32_t, 3>(
-  raft::device_resources const& handle,
-  const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
-  const float* query,
-  const std::uint32_t n_query_rows,
-  std::uint32_t k,
-  const std::int64_t* R_knn_inds,
-  const float* R_knn_dists,
-  DistFunc<float, std::uint32_t>& dfunc,
-  std::int64_t* inds,
-  float* dists,
-  float weight,
-  std::uint32_t* post_dists_counter);
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
\ No newline at end of file
diff --git a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu b/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu
deleted file mode 100644
index 04aa42c9f1..0000000000
--- a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/brute_force.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::detail {
-#define RAFT_INST(IdxT, T, IntT)                                                          \
-  template void brute_force_knn_impl<IntT, IdxT, T>(raft::device_resources const& handle, \
-                                                    std::vector<T*>& input,               \
-                                                    std::vector<IntT>& sizes,             \
-                                                    IntT D,                               \
-                                                    T* search_items,                      \
-                                                    IntT n,                               \
-                                                    IdxT* res_I,                          \
-                                                    T* res_D,                             \
-                                                    IntT k,                               \
-                                                    bool rowMajorIndex,                   \
-                                                    bool rowMajorQuery,                   \
-                                                    std::vector<IdxT>* translations,      \
-                                                    raft::distance::DistanceType metric,  \
-                                                    float metricArg,                      \
-                                                    raft::identity_op);
-RAFT_INST(long, float, int);
-#undef RAFT_INST
-}  // namespace raft::neighbors::detail
diff --git a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu b/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu
deleted file mode 100644
index a8b9d4299a..0000000000
--- a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/brute_force.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::detail {
-#define RAFT_INST(IdxT, T, IntT)                                                          \
-  template void brute_force_knn_impl<IntT, IdxT, T>(raft::device_resources const& handle, \
-                                                    std::vector<T*>& input,               \
-                                                    std::vector<IntT>& sizes,             \
-                                                    IntT D,                               \
-                                                    T* search_items,                      \
-                                                    IntT n,                               \
-                                                    IdxT* res_I,                          \
-                                                    T* res_D,                             \
-                                                    IntT k,                               \
-                                                    bool rowMajorIndex,                   \
-                                                    bool rowMajorQuery,                   \
-                                                    std::vector<IdxT>* translations,      \
-                                                    raft::distance::DistanceType metric,  \
-                                                    float metricArg,                      \
-                                                    raft::identity_op);
-RAFT_INST(long, float, unsigned int);
-#undef RAFT_INST
-}  // namespace raft::neighbors::detail
diff --git a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu b/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu
deleted file mode 100644
index c97e6e936a..0000000000
--- a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/brute_force.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::detail {
-#define RAFT_INST(IdxT, T, IntT)                                                          \
-  template void brute_force_knn_impl<IntT, IdxT, T>(raft::device_resources const& handle, \
-                                                    std::vector<T*>& input,               \
-                                                    std::vector<IntT>& sizes,             \
-                                                    IntT D,                               \
-                                                    T* search_items,                      \
-                                                    IntT n,                               \
-                                                    IdxT* res_I,                          \
-                                                    T* res_D,                             \
-                                                    IntT k,                               \
-                                                    bool rowMajorIndex,                   \
-                                                    bool rowMajorQuery,                   \
-                                                    std::vector<IdxT>* translations,      \
-                                                    raft::distance::DistanceType metric,  \
-                                                    float metricArg,                      \
-                                                    raft::identity_op);
-RAFT_INST(uint32_t, float, int);
-#undef RAFT_INST
-}  // namespace raft::neighbors::detail
diff --git a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu b/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu
deleted file mode 100644
index 87451c385a..0000000000
--- a/cpp/src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/brute_force.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::detail {
-#define RAFT_INST(IdxT, T, IntT)                                                          \
-  template void brute_force_knn_impl<IntT, IdxT, T>(raft::device_resources const& handle, \
-                                                    std::vector<T*>& input,               \
-                                                    std::vector<IntT>& sizes,             \
-                                                    IntT D,                               \
-                                                    T* search_items,                      \
-                                                    IntT n,                               \
-                                                    IdxT* res_I,                          \
-                                                    T* res_D,                             \
-                                                    IntT k,                               \
-                                                    bool rowMajorIndex,                   \
-                                                    bool rowMajorQuery,                   \
-                                                    std::vector<IdxT>* translations,      \
-                                                    raft::distance::DistanceType metric,  \
-                                                    float metricArg,                      \
-                                                    raft::identity_op);
-RAFT_INST(uint32_t, float, unsigned int);
-#undef RAFT_INST
-}  // namespace raft::neighbors::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
deleted file mode 100644
index 1a0322a722..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, float, true, false>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, float>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
deleted file mode 100644
index c7b5c9ffe9..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, true>, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
deleted file mode 100644
index efb2a477a7..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, true>, false, true>(uint32_t,
-                                                                                  uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
deleted file mode 100644
index b9051eb011..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, true>, true, false>(uint32_t,
-                                                                                  uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
deleted file mode 100644
index c6b1bad123..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, false>, true, true>(uint32_t,
-                                                                                  uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
deleted file mode 100644
index d6033345da..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, false>, false, true>(uint32_t,
-                                                                                   uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
deleted file mode 100644
index 1add18cb4a..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, fp_8bit<5, false>, true, false>(uint32_t,
-                                                                                   uint32_t)
-  -> compute_similarity_kernel_t<float, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
deleted file mode 100644
index 6020d7035b..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, half, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
deleted file mode 100644
index 62be67e1a9..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, half, false, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
deleted file mode 100644
index 145312f334..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, half, true, false>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
deleted file mode 100644
index c9365e1bb4..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, true>, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
deleted file mode 100644
index d5c6934da2..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, true>, false, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
deleted file mode 100644
index bac8c8706b..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, true>, true, false>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, true>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
deleted file mode 100644
index 2809005dd0..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, false>, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
deleted file mode 100644
index 015ef21a15..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, false>, false, true>(uint32_t,
-                                                                                  uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
deleted file mode 100644
index 0ac96c8440..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, fp_8bit<5, false>, true, false>(uint32_t,
-                                                                                  uint32_t)
-  -> compute_similarity_kernel_t<half, fp_8bit<5, false>>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
deleted file mode 100644
index f3501d11c0..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, half, true, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
deleted file mode 100644
index 7d10020480..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, half, false, true>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
deleted file mode 100644
index 91ec2eca3e..0000000000
--- a/cpp/src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<half, half, true, false>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<half, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/detail/ivfpq_compute_similarity_float_half_no_smem_lut.cu b/cpp/src/neighbors/specializations/detail/ivfpq_compute_similarity_float_half_no_smem_lut.cu
deleted file mode 100644
index 145312f334..0000000000
--- a/cpp/src/neighbors/specializations/detail/ivfpq_compute_similarity_float_half_no_smem_lut.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/detail/ivf_pq_search.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-#include <cuda_fp16.h>
-
-namespace raft::neighbors::ivf_pq::detail {
-
-template auto get_compute_similarity_kernel<float, half, true, false>(uint32_t, uint32_t)
-  -> compute_similarity_kernel_t<float, half>;
-
-}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/src/neighbors/specializations/fused_l2_knn_int_float_false.cu b/cpp/src/neighbors/specializations/fused_l2_knn_int_float_false.cu
deleted file mode 100644
index 72fdac9526..0000000000
--- a/cpp/src/neighbors/specializations/fused_l2_knn_int_float_false.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void fusedL2Knn<int, float, false>(size_t D,
-                                            int* out_inds,
-                                            float* out_dists,
-                                            const float* index,
-                                            const float* query,
-                                            size_t n_index_rows,
-                                            size_t n_query_rows,
-                                            int k,
-                                            bool rowMajorIndex,
-                                            bool rowMajorQuery,
-                                            cudaStream_t stream,
-                                            raft::distance::DistanceType metric);
-
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/neighbors/specializations/fused_l2_knn_int_float_true.cu b/cpp/src/neighbors/specializations/fused_l2_knn_int_float_true.cu
deleted file mode 100644
index c7616462fe..0000000000
--- a/cpp/src/neighbors/specializations/fused_l2_knn_int_float_true.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-template void fusedL2Knn<int, float, true>(size_t D,
-                                           int* out_inds,
-                                           float* out_dists,
-                                           const float* index,
-                                           const float* query,
-                                           size_t n_index_rows,
-                                           size_t n_query_rows,
-                                           int k,
-                                           bool rowMajorIndex,
-                                           bool rowMajorQuery,
-                                           cudaStream_t stream,
-                                           raft::distance::DistanceType metric);
-
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/neighbors/specializations/fused_l2_knn_long_float_false.cu b/cpp/src/neighbors/specializations/fused_l2_knn_long_float_false.cu
deleted file mode 100644
index 16bf058238..0000000000
--- a/cpp/src/neighbors/specializations/fused_l2_knn_long_float_false.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void fusedL2Knn<long, float, false>(size_t D,
-                                             long* out_inds,
-                                             float* out_dists,
-                                             const float* index,
-                                             const float* query,
-                                             size_t n_index_rows,
-                                             size_t n_query_rows,
-                                             int k,
-                                             bool rowMajorIndex,
-                                             bool rowMajorQuery,
-                                             cudaStream_t stream,
-                                             raft::distance::DistanceType metric);
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/neighbors/specializations/fused_l2_knn_long_float_true.cu b/cpp/src/neighbors/specializations/fused_l2_knn_long_float_true.cu
deleted file mode 100644
index 06cf55eae3..0000000000
--- a/cpp/src/neighbors/specializations/fused_l2_knn_long_float_true.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-namespace detail {
-
-template void fusedL2Knn<long, float, true>(size_t D,
-                                            long* out_inds,
-                                            float* out_dists,
-                                            const float* index,
-                                            const float* query,
-                                            size_t n_index_rows,
-                                            size_t n_query_rows,
-                                            int k,
-                                            bool rowMajorIndex,
-                                            bool rowMajorQuery,
-                                            cudaStream_t stream,
-                                            raft::distance::DistanceType metric);
-};  // namespace detail
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/neighbors/specializations/ivfflat_build_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_build_float_int64_t.cu
deleted file mode 100644
index 7082873d76..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_build_float_int64_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                               \
-  template auto build(raft::device_resources const& handle,                       \
-                      const index_params& params,                                 \
-                      raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<T, IdxT>;
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
deleted file mode 100644
index ebc1a7fefa..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                               \
-  template auto build(raft::device_resources const& handle,                       \
-                      const index_params& params,                                 \
-                      raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<T, IdxT>;
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
deleted file mode 100644
index 870db6e97e..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                               \
-  template auto build(raft::device_resources const& handle,                       \
-                      const index_params& params,                                 \
-                      raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<T, IdxT>;
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_extend_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_extend_float_int64_t.cu
deleted file mode 100644
index 71af06ad71..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_extend_float_int64_t.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                           \
-  template auto extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
-    ->raft::neighbors::ivf_flat::index<T, IdxT>;                                              \
-                                                                                              \
-  template void extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       raft::neighbors::ivf_flat::index<T, IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
deleted file mode 100644
index bb7bb6e7eb..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                           \
-  template auto extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
-    ->raft::neighbors::ivf_flat::index<T, IdxT>;                                              \
-                                                                                              \
-  template void extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       raft::neighbors::ivf_flat::index<T, IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
deleted file mode 100644
index 607b4b0913..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                           \
-  template auto extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
-    ->raft::neighbors::ivf_flat::index<T, IdxT>;                                              \
-                                                                                              \
-  template void extend(raft::device_resources const& handle,                                  \
-                       raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-                       std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-                       raft::neighbors::ivf_flat::index<T, IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_search_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_search_float_int64_t.cu
deleted file mode 100644
index dce7083139..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_search_float_int64_t.cu
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-// greppable-id-specializations-ivf-flat-search: The ivfflat_interleaved_scan
-// function is used in both raft::neighbors::ivf_flat::search and
-// raft::neighbors::detail::refine_device. To prevent a duplicate instantiation
-// of this function (which defines ~270 kernels) in the refine specializations,
-// an extern template definition is provided. To make sure
-// ivfflat_interleaved_scan is actually compiled here, we explicitly instantiate
-// it below. Please check related function calls after editing template
-// definition below. Search for `greppable-id-specializations-ivf-flat-search`
-// to find them.
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                          \
-  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan< \
-    T,                                                                       \
-    typename raft::spatial::knn::detail::utils::config<T>::value_t,          \
-    IdxT>(const index<T, IdxT>& index,                                       \
-          const T* queries,                                                  \
-          const uint32_t* coarse_query_results,                              \
-          const uint32_t n_queries,                                          \
-          const raft::distance::DistanceType metric,                         \
-          const uint32_t n_probes,                                           \
-          const uint32_t k,                                                  \
-          const bool select_min,                                             \
-          IdxT* neighbors,                                                   \
-          float* distances,                                                  \
-          uint32_t& grid_dim_x,                                              \
-          rmm::cuda_stream_view stream);                                     \
-                                                                             \
-  template void search(raft::device_resources const&,                        \
-                       raft::neighbors::ivf_flat::search_params const&,      \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>&,     \
-                       raft::device_matrix_view<const T, IdxT, row_major>,   \
-                       raft::device_matrix_view<IdxT, IdxT, row_major>,      \
-                       raft::device_matrix_view<float, IdxT, row_major>);
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
deleted file mode 100644
index b03d878bae..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                          \
-  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan< \
-    T,                                                                       \
-    typename raft::spatial::knn::detail::utils::config<T>::value_t,          \
-    IdxT>(const index<T, IdxT>& index,                                       \
-          const T* queries,                                                  \
-          const uint32_t* coarse_query_results,                              \
-          const uint32_t n_queries,                                          \
-          const raft::distance::DistanceType metric,                         \
-          const uint32_t n_probes,                                           \
-          const uint32_t k,                                                  \
-          const bool select_min,                                             \
-          IdxT* neighbors,                                                   \
-          float* distances,                                                  \
-          uint32_t& grid_dim_x,                                              \
-          rmm::cuda_stream_view stream);                                     \
-                                                                             \
-  template void search(raft::device_resources const&,                        \
-                       raft::neighbors::ivf_flat::search_params const&,      \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>&,     \
-                       raft::device_matrix_view<const T, IdxT, row_major>,   \
-                       raft::device_matrix_view<IdxT, IdxT, row_major>,      \
-                       raft::device_matrix_view<float, IdxT, row_major>);
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
deleted file mode 100644
index 2d42bae0d1..0000000000
--- a/cpp/src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_flat {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                          \
-  template void raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan< \
-    T,                                                                       \
-    typename raft::spatial::knn::detail::utils::config<T>::value_t,          \
-    IdxT>(const index<T, IdxT>& index,                                       \
-          const T* queries,                                                  \
-          const uint32_t* coarse_query_results,                              \
-          const uint32_t n_queries,                                          \
-          const raft::distance::DistanceType metric,                         \
-          const uint32_t n_probes,                                           \
-          const uint32_t k,                                                  \
-          const bool select_min,                                             \
-          IdxT* neighbors,                                                   \
-          float* distances,                                                  \
-          uint32_t& grid_dim_x,                                              \
-          rmm::cuda_stream_view stream);                                     \
-                                                                             \
-  template void search(raft::device_resources const&,                        \
-                       raft::neighbors::ivf_flat::search_params const&,      \
-                       const raft::neighbors::ivf_flat::index<T, IdxT>&,     \
-                       raft::device_matrix_view<const T, IdxT, row_major>,   \
-                       raft::device_matrix_view<IdxT, IdxT, row_major>,      \
-                       raft::device_matrix_view<float, IdxT, row_major>);
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_flat
diff --git a/cpp/src/neighbors/specializations/ivfpq_build_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_build_float_int64_t.cu
deleted file mode 100644
index d559291b93..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_build_float_int64_t.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                        \
-  template auto build<T, IdxT>(raft::device_resources const& handle,                       \
-                               const index_params& params,                                 \
-                               raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<IdxT>;
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
deleted file mode 100644
index c8b31e1fff..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                        \
-  template auto build<T, IdxT>(raft::device_resources const& handle,                       \
-                               const index_params& params,                                 \
-                               raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<IdxT>;
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
deleted file mode 100644
index 5fc62969f0..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                        \
-  template auto build<T, IdxT>(raft::device_resources const& handle,                       \
-                               const index_params& params,                                 \
-                               raft::device_matrix_view<const T, IdxT, row_major> dataset) \
-    ->index<IdxT>;
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
deleted file mode 100644
index 584bbfc45c..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
-  template auto extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    const index<IdxT>& idx)                                                \
-    ->index<IdxT>;                                                         \
-  template void extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    index<IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
deleted file mode 100644
index 00311a77e4..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
-  template auto extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    const index<IdxT>& idx)                                                \
-    ->index<IdxT>;                                                         \
-  template void extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    index<IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
deleted file mode 100644
index 11524886f0..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                        \
-  template auto extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    const index<IdxT>& idx)                                                \
-    ->index<IdxT>;                                                         \
-  template void extend<T, IdxT>(                                           \
-    raft::device_resources const& handle,                                  \
-    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
-    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-    index<IdxT>* idx);
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_search_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_search_float_int64_t.cu
deleted file mode 100644
index 92a4d89e6b..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_search_float_int64_t.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                         \
-  template void search<T, IdxT>(raft::device_resources const& handle,                       \
-                                const search_params& params,                                \
-                                const index<IdxT>& idx,                                     \
-                                raft::device_matrix_view<const T, IdxT, row_major> queries, \
-                                raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
-                                raft::device_matrix_view<float, IdxT, row_major> distances);
-
-RAFT_MAKE_INSTANCE(float, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
deleted file mode 100644
index 62a8b48ad5..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                         \
-  template void search<T, IdxT>(raft::device_resources const& handle,                       \
-                                const search_params& params,                                \
-                                const index<IdxT>& idx,                                     \
-                                raft::device_matrix_view<const T, IdxT, row_major> queries, \
-                                raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
-                                raft::device_matrix_view<float, IdxT, row_major> distances);
-
-RAFT_MAKE_INSTANCE(int8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
deleted file mode 100644
index 3bcf134a22..0000000000
--- a/cpp/src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors::ivf_pq {
-
-#define RAFT_MAKE_INSTANCE(T, IdxT)                                                         \
-  template void search<T, IdxT>(raft::device_resources const& handle,                       \
-                                const search_params& params,                                \
-                                const index<IdxT>& idx,                                     \
-                                raft::device_matrix_view<const T, IdxT, row_major> queries, \
-                                raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,  \
-                                raft::device_matrix_view<float, IdxT, row_major> distances);
-
-RAFT_MAKE_INSTANCE(uint8_t, int64_t);
-
-#undef RAFT_MAKE_INSTANCE
-
-}  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/specializations/refine_d_int64_t_float.cu b/cpp/src/neighbors/specializations/refine_d_int64_t_float.cu
deleted file mode 100644
index 0b0125459d..0000000000
--- a/cpp/src/neighbors/specializations/refine_d_int64_t_float.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-
-template void refine<int64_t, float, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::device_matrix_view<const float, int64_t, row_major> dataset,
-  raft::device_matrix_view<const float, int64_t, row_major> queries,
-  raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::device_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::device_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/neighbors/specializations/refine_d_int64_t_int8_t.cu b/cpp/src/neighbors/specializations/refine_d_int64_t_int8_t.cu
deleted file mode 100644
index d6c817b971..0000000000
--- a/cpp/src/neighbors/specializations/refine_d_int64_t_int8_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-
-template void refine<int64_t, int8_t, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::device_matrix_view<const int8_t, int64_t, row_major> dataset,
-  raft::device_matrix_view<const int8_t, int64_t, row_major> queries,
-  raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::device_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::device_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/neighbors/specializations/refine_d_int64_t_uint8_t.cu b/cpp/src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
deleted file mode 100644
index 3e0ca627a6..0000000000
--- a/cpp/src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-
-template void refine<int64_t, uint8_t, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::device_matrix_view<const uint8_t, int64_t, row_major> dataset,
-  raft::device_matrix_view<const uint8_t, int64_t, row_major> queries,
-  raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::device_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::device_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/neighbors/specializations/refine_h_int64_t_float.cu b/cpp/src/neighbors/specializations/refine_h_int64_t_float.cu
deleted file mode 100644
index 66a6bace53..0000000000
--- a/cpp/src/neighbors/specializations/refine_h_int64_t_float.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-
-template void refine<int64_t, float, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::host_matrix_view<const float, int64_t, row_major> dataset,
-  raft::host_matrix_view<const float, int64_t, row_major> queries,
-  raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::host_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::host_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/neighbors/specializations/refine_h_int64_t_int8_t.cu b/cpp/src/neighbors/specializations/refine_h_int64_t_int8_t.cu
deleted file mode 100644
index 22824b3a8e..0000000000
--- a/cpp/src/neighbors/specializations/refine_h_int64_t_int8_t.cu
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-template void refine<int64_t, int8_t, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::host_matrix_view<const int8_t, int64_t, row_major> dataset,
-  raft::host_matrix_view<const int8_t, int64_t, row_major> queries,
-  raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::host_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::host_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/neighbors/specializations/refine_h_int64_t_uint8_t.cu b/cpp/src/neighbors/specializations/refine_h_int64_t_uint8_t.cu
deleted file mode 100644
index 58dcfc87c9..0000000000
--- a/cpp/src/neighbors/specializations/refine_h_int64_t_uint8_t.cu
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
-
-namespace raft::neighbors {
-
-template void refine<int64_t, uint8_t, float, int64_t>(
-  raft::device_resources const& handle,
-  raft::host_matrix_view<const uint8_t, int64_t, row_major> dataset,
-  raft::host_matrix_view<const uint8_t, int64_t, row_major> queries,
-  raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
-  raft::host_matrix_view<int64_t, int64_t, row_major> indices,
-  raft::host_matrix_view<float, int64_t, row_major> distances,
-  distance::DistanceType metric);
-
-}  // namespace raft::neighbors
diff --git a/cpp/src/nn/specializations/brute_force_knn_long_float_int.cu b/cpp/src/nn/specializations/brute_force_knn_long_float_int.cu
deleted file mode 100644
index 2c21d1ec64..0000000000
--- a/cpp/src/nn/specializations/brute_force_knn_long_float_int.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-
-template void brute_force_knn<long, float, int>(raft::device_resources const& handle,
-                                                std::vector<float*>& input,
-                                                std::vector<int>& sizes,
-                                                int D,
-                                                float* search_items,
-                                                int n,
-                                                long* res_I,
-                                                float* res_D,
-                                                int k,
-                                                bool rowMajorIndex,
-                                                bool rowMajorQuery,
-                                                std::vector<long>* translations,
-                                                distance::DistanceType metric,
-                                                float metric_arg);
-
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/nn/specializations/brute_force_knn_long_float_uint.cu b/cpp/src/nn/specializations/brute_force_knn_long_float_uint.cu
deleted file mode 100644
index 7e6e7e80d0..0000000000
--- a/cpp/src/nn/specializations/brute_force_knn_long_float_uint.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-
-template void brute_force_knn<long, float, unsigned int>(raft::device_resources const& handle,
-                                                         std::vector<float*>& input,
-                                                         std::vector<unsigned int>& sizes,
-                                                         unsigned int D,
-                                                         float* search_items,
-                                                         unsigned int n,
-                                                         long* res_I,
-                                                         float* res_D,
-                                                         unsigned int k,
-                                                         bool rowMajorIndex,
-                                                         bool rowMajorQuery,
-                                                         std::vector<long>* translations,
-                                                         distance::DistanceType metric,
-                                                         float metric_arg);
-
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_int.cu b/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_int.cu
deleted file mode 100644
index e94c12d579..0000000000
--- a/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_int.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-template void brute_force_knn<uint32_t, float, int>(raft::device_resources const& handle,
-                                                    std::vector<float*>& input,
-                                                    std::vector<int>& sizes,
-                                                    int D,
-                                                    float* search_items,
-                                                    int n,
-                                                    uint32_t* res_I,
-                                                    float* res_D,
-                                                    int k,
-                                                    bool rowMajorIndex,
-                                                    bool rowMajorQuery,
-                                                    std::vector<uint32_t>* translations,
-                                                    distance::DistanceType metric,
-                                                    float metric_arg);
-
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_uint.cu b/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_uint.cu
deleted file mode 100644
index 95cf8a1eb3..0000000000
--- a/cpp/src/nn/specializations/brute_force_knn_uint32_t_float_uint.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdint>
-#include <raft/neighbors/specializations.cuh>
-#include <raft/spatial/knn/knn.cuh>
-
-namespace raft {
-namespace spatial {
-namespace knn {
-
-template void brute_force_knn<uint32_t, float, unsigned int>(raft::device_resources const& handle,
-                                                             std::vector<float*>& input,
-                                                             std::vector<unsigned int>& sizes,
-                                                             unsigned int D,
-                                                             float* search_items,
-                                                             unsigned int n,
-                                                             uint32_t* res_I,
-                                                             float* res_D,
-                                                             unsigned int k,
-                                                             bool rowMajorIndex,
-                                                             bool rowMajorQuery,
-                                                             std::vector<uint32_t>* translations,
-                                                             distance::DistanceType metric,
-                                                             float metric_arg);
-
-};  // namespace knn
-};  // namespace spatial
-};  // namespace raft
diff --git a/cpp/src/cluster/cluster_cost.cuh b/cpp/src/raft_runtime/cluster/cluster_cost.cuh
similarity index 100%
rename from cpp/src/cluster/cluster_cost.cuh
rename to cpp/src/raft_runtime/cluster/cluster_cost.cuh
diff --git a/cpp/src/cluster/cluster_cost_double.cu b/cpp/src/raft_runtime/cluster/cluster_cost_double.cu
similarity index 96%
rename from cpp/src/cluster/cluster_cost_double.cu
rename to cpp/src/raft_runtime/cluster/cluster_cost_double.cu
index 2244ba4ed3..b6df92c839 100644
--- a/cpp/src/cluster/cluster_cost_double.cu
+++ b/cpp/src/raft_runtime/cluster/cluster_cost_double.cu
@@ -15,7 +15,6 @@
  */
 
 #include "cluster_cost.cuh"
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
diff --git a/cpp/src/cluster/cluster_cost_float.cu b/cpp/src/raft_runtime/cluster/cluster_cost_float.cu
similarity index 96%
rename from cpp/src/cluster/cluster_cost_float.cu
rename to cpp/src/raft_runtime/cluster/cluster_cost_float.cu
index 4164265b55..2c26b69984 100644
--- a/cpp/src/cluster/cluster_cost_float.cu
+++ b/cpp/src/raft_runtime/cluster/cluster_cost_float.cu
@@ -15,7 +15,6 @@
  */
 
 #include "cluster_cost.cuh"
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
diff --git a/cpp/src/cluster/kmeans_fit_double.cu b/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
similarity index 96%
rename from cpp/src/cluster/kmeans_fit_double.cu
rename to cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
index 12f4fba318..0b8b458042 100644
--- a/cpp/src/cluster/kmeans_fit_double.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
diff --git a/cpp/src/cluster/kmeans_fit_float.cu b/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
similarity index 96%
rename from cpp/src/cluster/kmeans_fit_float.cu
rename to cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
index 48505dcc3e..a2831c2cf0 100644
--- a/cpp/src/cluster/kmeans_fit_float.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
diff --git a/cpp/src/cluster/kmeans_init_plus_plus_double.cu b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
similarity index 96%
rename from cpp/src/cluster/kmeans_init_plus_plus_double.cu
rename to cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
index 5bb0835595..d2ec26f882 100644
--- a/cpp/src/cluster/kmeans_init_plus_plus_double.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
diff --git a/cpp/src/cluster/kmeans_init_plus_plus_float.cu b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
similarity index 96%
rename from cpp/src/cluster/kmeans_init_plus_plus_float.cu
rename to cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
index f211afd06e..bacab3b7d6 100644
--- a/cpp/src/cluster/kmeans_init_plus_plus_float.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
diff --git a/cpp/src/cluster/update_centroids.cuh b/cpp/src/raft_runtime/cluster/update_centroids.cuh
similarity index 98%
rename from cpp/src/cluster/update_centroids.cuh
rename to cpp/src/raft_runtime/cluster/update_centroids.cuh
index 7c13252384..de219329df 100644
--- a/cpp/src/cluster/update_centroids.cuh
+++ b/cpp/src/raft_runtime/cluster/update_centroids.cuh
@@ -15,7 +15,6 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/norm.cuh>
diff --git a/cpp/src/cluster/update_centroids_double.cu b/cpp/src/raft_runtime/cluster/update_centroids_double.cu
similarity index 97%
rename from cpp/src/cluster/update_centroids_double.cu
rename to cpp/src/raft_runtime/cluster/update_centroids_double.cu
index 0f38c7dd53..d967c503ff 100644
--- a/cpp/src/cluster/update_centroids_double.cu
+++ b/cpp/src/raft_runtime/cluster/update_centroids_double.cu
@@ -15,7 +15,6 @@
  */
 
 #include "update_centroids.cuh"
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
diff --git a/cpp/src/cluster/update_centroids_float.cu b/cpp/src/raft_runtime/cluster/update_centroids_float.cu
similarity index 97%
rename from cpp/src/cluster/update_centroids_float.cu
rename to cpp/src/raft_runtime/cluster/update_centroids_float.cu
index 8f0e79b438..b141a1ef20 100644
--- a/cpp/src/cluster/update_centroids_float.cu
+++ b/cpp/src/raft_runtime/cluster/update_centroids_float.cu
@@ -15,7 +15,6 @@
  */
 
 #include "update_centroids.cuh"
-#include <raft/cluster/specializations.cuh>
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
diff --git a/cpp/src/distance/fused_l2_min_arg.cu b/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
similarity index 97%
rename from cpp/src/distance/fused_l2_min_arg.cu
rename to cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
index b682446cc2..bec71ae698 100644
--- a/cpp/src/distance/fused_l2_min_arg.cu
+++ b/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
@@ -19,7 +19,7 @@
 #include <raft/core/kvp.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
-#include <raft/distance/specializations.cuh>
+#include <raft/linalg/norm.cuh>
 #include <thrust/for_each.h>
 #include <thrust/tuple.h>
 
@@ -95,4 +95,4 @@ void fused_l2_nn_min_arg(raft::device_resources const& handle,
   compute_fused_l2_nn_min_arg<double, int>(handle, min, x, y, m, n, k, sqrt);
 }
 
-}  // end namespace raft::runtime::distance
\ No newline at end of file
+}  // end namespace raft::runtime::distance
diff --git a/cpp/src/distance/pairwise_distance.cu b/cpp/src/raft_runtime/distance/pairwise_distance.cu
similarity index 97%
rename from cpp/src/distance/pairwise_distance.cu
rename to cpp/src/raft_runtime/distance/pairwise_distance.cu
index dfdfa553e9..62597a4799 100644
--- a/cpp/src/distance/pairwise_distance.cu
+++ b/cpp/src/raft_runtime/distance/pairwise_distance.cu
@@ -17,7 +17,6 @@
 #include <raft/core/device_resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
-#include <raft/distance/specializations.cuh>
 
 namespace raft::runtime::distance {
 
diff --git a/cpp/src/matrix/select_k_float_int64_t.cu b/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
similarity index 96%
rename from cpp/src/matrix/select_k_float_int64_t.cu
rename to cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
index 309ac50c6b..8814a8aafc 100644
--- a/cpp/src/matrix/select_k_float_int64_t.cu
+++ b/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
@@ -17,7 +17,6 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/matrix/select_k.cuh>
-#include <raft/matrix/specializations.cuh>
 
 #include <raft_runtime/matrix/select_k.hpp>
 
diff --git a/cpp/src/neighbors/brute_force_knn_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
similarity index 97%
rename from cpp/src/neighbors/brute_force_knn_int64_t_float.cu
rename to cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
index 88545b3607..ea6002eab0 100644
--- a/cpp/src/neighbors/brute_force_knn_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
@@ -18,8 +18,6 @@
 #include <raft/core/device_resources.hpp>
 #include <raft/neighbors/brute_force.cuh>
 
-#include <raft/neighbors/specializations.cuh>
-
 #include <raft_runtime/neighbors/brute_force.hpp>
 
 #include <vector>
diff --git a/cpp/src/neighbors/ivf_flat_build.cu b/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
similarity index 98%
rename from cpp/src/neighbors/ivf_flat_build.cu
rename to cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
index 0d82fdbb08..48a40ab56e 100644
--- a/cpp/src/neighbors/ivf_flat_build.cu
+++ b/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_flat.cuh>
 #include <raft_runtime/neighbors/ivf_flat.hpp>
 
 namespace raft::runtime::neighbors::ivf_flat {
diff --git a/cpp/src/neighbors/ivf_flat_search.cu b/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
similarity index 97%
rename from cpp/src/neighbors/ivf_flat_search.cu
rename to cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
index b843ee7c30..eefc7f2932 100644
--- a/cpp/src/neighbors/ivf_flat_search.cu
+++ b/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_flat.cuh>
 #include <raft_runtime/neighbors/ivf_flat.hpp>
 
 namespace raft::runtime::neighbors::ivf_flat {
diff --git a/cpp/src/neighbors/ivfpq_build.cu b/cpp/src/raft_runtime/neighbors/ivfpq_build.cu
similarity index 98%
rename from cpp/src/neighbors/ivfpq_build.cu
rename to cpp/src/raft_runtime/neighbors/ivfpq_build.cu
index 7f91e34969..5bfb546060 100644
--- a/cpp/src/neighbors/ivfpq_build.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_build.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 
 namespace raft::runtime::neighbors::ivf_pq {
diff --git a/cpp/src/neighbors/ivfpq_deserialize.cu b/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
similarity index 95%
rename from cpp/src/neighbors/ivfpq_deserialize.cu
rename to cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
index 8d54e3cc55..45b731fdcf 100644
--- a/cpp/src/neighbors/ivfpq_deserialize.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
@@ -15,7 +15,7 @@
  */
 
 #include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_pq_serialize.cuh>
 
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
new file mode 100644
index 0000000000..d55d726671
--- /dev/null
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
+
+#include <raft_runtime/neighbors/ivf_pq.hpp>
+
+namespace raft::runtime::neighbors::ivf_pq {
+
+#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
+  void search(raft::device_resources const& handle,                                               \
+              const raft::neighbors::ivf_pq::search_params& params,                               \
+              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
+              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
+              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
+              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
+  {                                                                                               \
+    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
+  }
+
+RAFT_SEARCH_INST(float, int64_t);
+
+#undef RAFT_INST_SEARCH
+
+}  // namespace raft::runtime::neighbors::ivf_pq
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
new file mode 100644
index 0000000000..b73cbc0751
--- /dev/null
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
+
+#include <raft_runtime/neighbors/ivf_pq.hpp>
+
+namespace raft::runtime::neighbors::ivf_pq {
+
+#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
+  void search(raft::device_resources const& handle,                                               \
+              const raft::neighbors::ivf_pq::search_params& params,                               \
+              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
+              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
+              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
+              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
+  {                                                                                               \
+    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
+  }
+
+RAFT_SEARCH_INST(int8_t, int64_t);
+
+#undef RAFT_INST_SEARCH
+
+}  // namespace raft::runtime::neighbors::ivf_pq
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..2b3dfe585d
--- /dev/null
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
+
+#include <raft_runtime/neighbors/ivf_pq.hpp>
+
+namespace raft::runtime::neighbors::ivf_pq {
+
+#define RAFT_SEARCH_INST(T, IdxT)                                                                 \
+  void search(raft::device_resources const& handle,                                               \
+              const raft::neighbors::ivf_pq::search_params& params,                               \
+              const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
+              raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
+              raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,                          \
+              raft::device_matrix_view<float, IdxT, row_major> distances)                         \
+  {                                                                                               \
+    raft::neighbors::ivf_pq::search<T, IdxT>(handle, params, idx, queries, neighbors, distances); \
+  }
+
+RAFT_SEARCH_INST(uint8_t, int64_t);
+
+#undef RAFT_INST_SEARCH
+
+}  // namespace raft::runtime::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/ivfpq_serialize.cu b/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
similarity index 95%
rename from cpp/src/neighbors/ivfpq_serialize.cu
rename to cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
index e251f1442f..21bd221c45 100644
--- a/cpp/src/neighbors/ivfpq_serialize.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
@@ -15,7 +15,7 @@
  */
 
 #include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/specializations.cuh>
+#include <raft/neighbors/ivf_pq_serialize.cuh>
 
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 
diff --git a/cpp/src/neighbors/refine_d_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
similarity index 96%
rename from cpp/src/neighbors/refine_d_int64_t_float.cu
rename to cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
index 8ad8f9e8f1..79cec55294 100644
--- a/cpp/src/neighbors/refine_d_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/neighbors/refine_d_int64_t_int8_t.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
similarity index 96%
rename from cpp/src/neighbors/refine_d_int64_t_int8_t.cu
rename to cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
index 817369ed6a..f8a7a8c9c8 100644
--- a/cpp/src/neighbors/refine_d_int64_t_int8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/neighbors/refine_d_int64_t_uint8_t.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
similarity index 96%
rename from cpp/src/neighbors/refine_d_int64_t_uint8_t.cu
rename to cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
index fb426b2c02..8f68f9f88e 100644
--- a/cpp/src/neighbors/refine_d_int64_t_uint8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/neighbors/refine_h_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
similarity index 96%
rename from cpp/src/neighbors/refine_h_int64_t_float.cu
rename to cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
index 1f950dc3b6..7f19d44700 100644
--- a/cpp/src/neighbors/refine_h_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
@@ -16,7 +16,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/neighbors/refine_h_int64_t_int8_t.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
similarity index 96%
rename from cpp/src/neighbors/refine_h_int64_t_int8_t.cu
rename to cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
index da99df3618..bd21c6b198 100644
--- a/cpp/src/neighbors/refine_h_int64_t_int8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/neighbors/refine_h_int64_t_uint8_t.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
similarity index 96%
rename from cpp/src/neighbors/refine_h_int64_t_uint8_t.cu
rename to cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
index 990754b033..f10d01cc09 100644
--- a/cpp/src/neighbors/refine_h_int64_t_uint8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
@@ -15,7 +15,6 @@
  */
 
 #include <raft/neighbors/refine.cuh>
-#include <raft/neighbors/specializations.cuh>
 
 namespace raft::runtime::neighbors {
 
diff --git a/cpp/src/random/common.cuh b/cpp/src/raft_runtime/random/common.cuh
similarity index 100%
rename from cpp/src/random/common.cuh
rename to cpp/src/raft_runtime/random/common.cuh
diff --git a/cpp/src/random/rmat_rectangular_generator_int64_double.cu b/cpp/src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu
similarity index 100%
rename from cpp/src/random/rmat_rectangular_generator_int64_double.cu
rename to cpp/src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu
diff --git a/cpp/src/random/rmat_rectangular_generator_int64_float.cu b/cpp/src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
similarity index 100%
rename from cpp/src/random/rmat_rectangular_generator_int64_float.cu
rename to cpp/src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
diff --git a/cpp/src/random/rmat_rectangular_generator_int_double.cu b/cpp/src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
similarity index 100%
rename from cpp/src/random/rmat_rectangular_generator_int_double.cu
rename to cpp/src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
diff --git a/cpp/src/random/rmat_rectangular_generator_int_float.cu b/cpp/src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
similarity index 100%
rename from cpp/src/random/rmat_rectangular_generator_int_float.cu
rename to cpp/src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers.cu b/cpp/src/spatial/knn/detail/ball_cover/registers.cu
new file mode 100644
index 0000000000..0bb6d123a9
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers.cu
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims)                                                   \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    raft::spatial::knn::detail::DistFunc<Mvalue_t, Mvalue_int>& dfunc,                       \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims)                                                   \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    raft::spatial::knn::detail::DistFunc<Mvalue_t, Mvalue_int>& dfunc,                       \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(std::int64_t, float, std::uint32_t, 2);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(std::int64_t, float, std::uint32_t, 3);
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(std::int64_t, float, std::uint32_t, 2);
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(std::int64_t, float, std::uint32_t, 3);
+
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
new file mode 100644
index 0000000000..f8ce27728b
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+header = """/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint> // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+"""
+
+
+macro_pass_one = """
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \\
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \\
+  template void                                                                       \\
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \\
+    raft::device_resources const& handle,                                                    \\
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \\
+    const Mvalue_t* query,                                                                   \\
+    const Mvalue_int n_query_rows,                                                           \\
+    Mvalue_int k,                                                                            \\
+    const Mvalue_idx* R_knn_inds,                                                            \\
+    const Mvalue_t* R_knn_dists,                                                             \\
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \\
+    Mvalue_idx* inds,                                                                        \\
+    Mvalue_t* dists,                                                                         \\
+    float weight,                                                                            \\
+    Mvalue_int* dists_counter)
+
+"""
+
+macro_pass_two = """
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \\
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \\
+  template void                                                                       \\
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \\
+    raft::device_resources const& handle,                                                    \\
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \\
+    const Mvalue_t* query,                                                                   \\
+    const Mvalue_int n_query_rows,                                                           \\
+    Mvalue_int k,                                                                            \\
+    const Mvalue_idx* R_knn_inds,                                                            \\
+    const Mvalue_t* R_knn_dists,                                                             \\
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \\
+    Mvalue_idx* inds,                                                                        \\
+    Mvalue_t* dists,                                                                         \\
+    float weight,                                                                            \\
+    Mvalue_int* dists_counter)
+
+"""
+
+distances = dict(
+    haversine="raft::spatial::knn::detail::HaversineFunc",
+    euclidean="raft::spatial::knn::detail::EuclideanFunc",
+    dist="raft::spatial::knn::detail::DistFunc",
+)
+
+for k, v in distances.items():
+    for dim in [2, 3]:
+        path = f"registers_pass_one_{dim}d_{k}.cu"
+        with open(path, "w") as f:
+            f.write(header)
+            f.write(macro_pass_one)
+            f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(\n")
+            f.write(f"  std::int64_t, float, std::uint32_t, {dim}, {v});\n")
+            f.write("#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one\n")
+        print(f"src/spatial/knn/detail/ball_cover/{path}")
+
+for k, v in distances.items():
+    for dim in [2, 3]:
+        path = f"registers_pass_two_{dim}d_{k}.cu"
+        with open(path, "w") as f:
+            f.write(header)
+            f.write(macro_pass_two)
+            f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(\n")
+            f.write(f"  std::int64_t, float, std::uint32_t, {dim}, {v});\n")
+            f.write("#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two\n")
+        print(f"src/spatial/knn/detail/ball_cover/{path}")
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
new file mode 100644
index 0000000000..b4ecac06e6
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
new file mode 100644
index 0000000000..31628d8b82
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
new file mode 100644
index 0000000000..80fda1bf9d
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
new file mode 100644
index 0000000000..40aa89aa39
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
new file mode 100644
index 0000000000..be159932a6
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
new file mode 100644
index 0000000000..a9fe8f355f
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
new file mode 100644
index 0000000000..b20df46a4f
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
new file mode 100644
index 0000000000..d5042b0142
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
new file mode 100644
index 0000000000..01002d356e
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
new file mode 100644
index 0000000000..5746ab99fb
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
new file mode 100644
index 0000000000..fad007a2d4
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
new file mode 100644
index 0000000000..93083da5c6
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by registers_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python registers_00_generate.py
+ *
+ */
+
+#include <cstdint>  // int64_t
+#include <raft/spatial/knn/detail/ball_cover/registers-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(                            \
+  Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
+  template void                                                                              \
+  raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
+    raft::device_resources const& handle,                                                    \
+    const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
+    const Mvalue_t* query,                                                                   \
+    const Mvalue_int n_query_rows,                                                           \
+    Mvalue_int k,                                                                            \
+    const Mvalue_idx* R_knn_inds,                                                            \
+    const Mvalue_t* R_knn_dists,                                                             \
+    Mdist_func<Mvalue_t, Mvalue_int>& dfunc,                                                 \
+    Mvalue_idx* inds,                                                                        \
+    Mvalue_t* dists,                                                                         \
+    float weight,                                                                            \
+    Mvalue_int* dists_counter)
+
+instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(
+  std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc);
+#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two
diff --git a/cpp/src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu b/cpp/src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
new file mode 100644
index 0000000000..67b08655e6
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstddef>                           // size_t
+#include <cstdint>                           // int_Xt
+#include <raft/distance/distance_types.hpp>  // DistanceType
+#include <raft/spatial/knn/detail/fused_l2_knn-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_fusedL2Knn(Mvalue_idx, Mvalue_t, MusePrevTopKs)  \
+  template void raft::spatial::knn::detail::fusedL2Knn<Mvalue_idx, Mvalue_t, MusePrevTopKs>( \
+    size_t D,                                                                                \
+    Mvalue_idx * out_inds,                                                                   \
+    Mvalue_t * out_dists,                                                                    \
+    const Mvalue_t* index,                                                                   \
+    const Mvalue_t* query,                                                                   \
+    size_t n_index_rows,                                                                     \
+    size_t n_query_rows,                                                                     \
+    int k,                                                                                   \
+    bool rowMajorIndex,                                                                      \
+    bool rowMajorQuery,                                                                      \
+    cudaStream_t stream,                                                                     \
+    raft::distance::DistanceType metric)
+
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int32_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int32_t, float, false);
+
+#undef instantiate_raft_spatial_knn_detail_fusedL2Knn
diff --git a/cpp/src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu b/cpp/src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
new file mode 100644
index 0000000000..3c0d13710e
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstddef>                           // size_t
+#include <cstdint>                           // int_Xt
+#include <raft/distance/distance_types.hpp>  // DistanceType
+#include <raft/spatial/knn/detail/fused_l2_knn-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_fusedL2Knn(Mvalue_idx, Mvalue_t, MusePrevTopKs)  \
+  template void raft::spatial::knn::detail::fusedL2Knn<Mvalue_idx, Mvalue_t, MusePrevTopKs>( \
+    size_t D,                                                                                \
+    Mvalue_idx * out_inds,                                                                   \
+    Mvalue_t * out_dists,                                                                    \
+    const Mvalue_t* index,                                                                   \
+    const Mvalue_t* query,                                                                   \
+    size_t n_index_rows,                                                                     \
+    size_t n_query_rows,                                                                     \
+    int k,                                                                                   \
+    bool rowMajorIndex,                                                                      \
+    bool rowMajorQuery,                                                                      \
+    cudaStream_t stream,                                                                     \
+    raft::distance::DistanceType metric)
+
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int64_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(int64_t, float, false);
+
+#undef instantiate_raft_spatial_knn_detail_fusedL2Knn
diff --git a/cpp/src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu b/cpp/src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
new file mode 100644
index 0000000000..e799c5181f
--- /dev/null
+++ b/cpp/src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstddef>                           // size_t
+#include <cstdint>                           // int_Xt
+#include <raft/distance/distance_types.hpp>  // DistanceType
+#include <raft/spatial/knn/detail/fused_l2_knn-inl.cuh>
+
+#define instantiate_raft_spatial_knn_detail_fusedL2Knn(Mvalue_idx, Mvalue_t, MusePrevTopKs)  \
+  template void raft::spatial::knn::detail::fusedL2Knn<Mvalue_idx, Mvalue_t, MusePrevTopKs>( \
+    size_t D,                                                                                \
+    Mvalue_idx * out_inds,                                                                   \
+    Mvalue_t * out_dists,                                                                    \
+    const Mvalue_t* index,                                                                   \
+    const Mvalue_t* query,                                                                   \
+    size_t n_index_rows,                                                                     \
+    size_t n_query_rows,                                                                     \
+    int k,                                                                                   \
+    bool rowMajorIndex,                                                                      \
+    bool rowMajorQuery,                                                                      \
+    cudaStream_t stream,                                                                     \
+    raft::distance::DistanceType metric)
+
+// These are used by brute_force_knn:
+instantiate_raft_spatial_knn_detail_fusedL2Knn(uint32_t, float, true);
+instantiate_raft_spatial_knn_detail_fusedL2Knn(uint32_t, float, false);
+
+#undef instantiate_raft_spatial_knn_detail_fusedL2Knn
diff --git a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu b/cpp/src/util/memory_pool.cpp
similarity index 72%
rename from cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu
rename to cpp/src/util/memory_pool.cpp
index 7ea4b60e09..837e870043 100644
--- a/cpp/src/distance/specializations/detail/kernels/rbf_kernel_double.cu
+++ b/cpp/src/util/memory_pool.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,4 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
-
-template class raft::distance::kernels::detail::RBFKernel<double>;
\ No newline at end of file
+#include <raft/util/memory_pool-inl.hpp>
diff --git a/cpp/template/src/test_distance.cu b/cpp/template/src/test_distance.cu
index b86dde70e5..e165cd8f14 100644
--- a/cpp/template/src/test_distance.cu
+++ b/cpp/template/src/test_distance.cu
@@ -20,10 +20,6 @@
 #include <raft/distance/distance.cuh>
 #include <raft/random/make_blobs.cuh>
 
-#ifdef RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 int main()
 {
   raft::device_resources handle;
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index c8d4f91ec0..7f45a6dd22 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -17,7 +17,7 @@
 
 function(ConfigureTest)
 
-  set(options OPTIONAL LIB)
+  set(options OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY)
   set(oneValueArgs NAME)
   set(multiValueArgs PATH TARGETS CONFIGURATIONS)
 
@@ -59,6 +59,10 @@ function(ConfigureTest)
                          "$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
   )
 
+  if(ConfigureTest_EXPLICIT_INSTANTIATE_ONLY)
+    target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY")
+  endif()
+
   target_include_directories(${TEST_NAME} PUBLIC "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/test>")
 
   install(
@@ -88,6 +92,7 @@ if(BUILD_TESTS)
     test/cluster/kmeans_find_k.cu
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -112,6 +117,9 @@ if(BUILD_TESTS)
     test/core/span.cu
     test/core/temporary_device_buffer.cu
     test/test.cpp
+    OPTIONAL
+    LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -119,6 +127,7 @@ if(BUILD_TESTS)
     DISTANCE_TEST
     PATH
     test/distance/dist_adj.cu
+    test/distance/dist_adj_distance_instance.cu
     test/distance/dist_canberra.cu
     test/distance/dist_correlation.cu
     test/distance/dist_cos.cu
@@ -140,8 +149,46 @@ if(BUILD_TESTS)
     test/distance/gram.cu
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
+  )
+
+  list(
+    APPEND
+    EXT_HEADER_TEST_SOURCES
+    test/ext_headers/raft_neighbors_brute_force.cu
+    test/ext_headers/raft_distance_distance.cu
+    test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu
+    test/ext_headers/raft_matrix_detail_select_k.cu
+    test/ext_headers/raft_neighbors_ball_cover.cu
+    test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu
+    test/ext_headers/raft_distance_fused_l2_nn.cu
+    test/ext_headers/raft_neighbors_ivf_pq.cu
+    test/ext_headers/raft_util_memory_pool.cpp
+    test/ext_headers/raft_neighbors_ivf_flat.cu
+    test/ext_headers/raft_core_logger.cpp
+    test/ext_headers/raft_neighbors_refine.cu
+    test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu
+    test/ext_headers/raft_neighbors_detail_selection_faiss.cu
+    test/ext_headers/raft_linalg_detail_coalesced_reduction.cu
+    test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu
+    test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu
+    test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu
   )
 
+  # Test that the split headers compile in isolation with:
+  #
+  # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined
+  # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined
+  # * EXT_HEADERS_TEST_IMPLICIT:          no macros defined.
+  ConfigureTest(
+    NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} OPTIONAL LIB
+    EXPLICIT_INSTANTIATE_ONLY
+  )
+  ConfigureTest(
+    NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} OPTIONAL LIB
+  )
+  ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES})
+
   ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu)
 
   ConfigureTest(
@@ -201,6 +248,7 @@ if(BUILD_TESTS)
     test/sparse/spectral_matrix.cu
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -220,7 +268,7 @@ if(BUILD_TESTS)
 
   ConfigureTest(
     NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu
-    test/lap/lap.cu test/sparse/mst.cu OPTIONAL LIB
+    test/lap/lap.cu test/sparse/mst.cu OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -245,19 +293,20 @@ if(BUILD_TESTS)
   )
 
   ConfigureTest(
-    NAME 
-    SPARSE_DIST_TEST 
-    PATH 
-    test/sparse/dist_coo_spmv.cu 
-    test/sparse/distance.cu 
-    test/sparse/gram.cu 
-    OPTIONAL 
-    LIB
+    NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu
+    test/sparse/gram.cu OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
-    NAME SPARSE_NEIGHBORS_TEST PATH test/sparse/neighbors/connect_components.cu
-    test/sparse/neighbors/brute_force.cu test/sparse/neighbors/knn_graph.cu OPTIONAL LIB
+    NAME
+    SPARSE_NEIGHBORS_TEST
+    PATH
+    test/sparse/neighbors/connect_components.cu
+    test/sparse/neighbors/brute_force.cu
+    test/sparse/neighbors/knn_graph.cu
+    OPTIONAL
+    LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -283,6 +332,7 @@ if(BUILD_TESTS)
     test/neighbors/selection.cu
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
@@ -316,6 +366,7 @@ if(BUILD_TESTS)
     test/stats/v_measure.cu
     OPTIONAL
     LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureTest(
diff --git a/cpp/test/cluster/cluster_solvers.cu b/cpp/test/cluster/cluster_solvers.cu
index f26c598a2b..60e5f62dc0 100644
--- a/cpp/test/cluster/cluster_solvers.cu
+++ b/cpp/test/cluster/cluster_solvers.cu
@@ -19,10 +19,6 @@
 #include <memory>
 #include <raft/core/device_resources.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/spectral/specializations.cuh>
-#endif
-
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/modularity_maximization.cuh>
 
diff --git a/cpp/test/cluster/kmeans.cu b/cpp/test/cluster/kmeans.cu
index cfec84256b..20110eed11 100644
--- a/cpp/test/cluster/kmeans.cu
+++ b/cpp/test/cluster/kmeans.cu
@@ -29,10 +29,6 @@
 #include <rmm/device_uvector.hpp>
 #include <thrust/fill.h>
 
-#if defined RAFT_COMPILED
-#include <raft/cluster/specializations.cuh>
-#endif
-
 namespace raft {
 
 template <typename T>
diff --git a/cpp/test/cluster/kmeans_balanced.cu b/cpp/test/cluster/kmeans_balanced.cu
index 220eba4186..a34f2f3b59 100644
--- a/cpp/test/cluster/kmeans_balanced.cu
+++ b/cpp/test/cluster/kmeans_balanced.cu
@@ -30,10 +30,6 @@
 #include <rmm/device_uvector.hpp>
 #include <thrust/fill.h>
 
-#if defined RAFT_COMPILED
-#include <raft/cluster/specializations.cuh>
-#endif
-
 /* This test takes advantage of the fact that make_blobs generates balanced clusters.
  * It doesn't currently test whether the algorithm can make balanced clusters with an imbalanced
  * dataset.
diff --git a/cpp/test/cluster/kmeans_find_k.cu b/cpp/test/cluster/kmeans_find_k.cu
index a865651f56..bb41d4fafc 100644
--- a/cpp/test/cluster/kmeans_find_k.cu
+++ b/cpp/test/cluster/kmeans_find_k.cu
@@ -25,10 +25,6 @@
 #include <raft/random/make_blobs.cuh>
 #include <raft/util/cuda_utils.cuh>
 
-#if defined RAFT_COMPILED
-#include <raft/cluster/specializations.cuh>
-#endif
-
 namespace raft {
 
 template <typename T>
diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu
index 4946d52f26..b2b177dde6 100644
--- a/cpp/test/cluster/linkage.cu
+++ b/cpp/test/cluster/linkage.cu
@@ -14,16 +14,21 @@
  * limitations under the License.
  */
 
+// XXX: We allow the instantiation of fused_l2_nn here:
+// raft::linkage::FixConnectivitiesRedOp<value_idx, value_t> red_op(colors.data(), params.n_row);
+// raft::linkage::connect_components<value_idx, value_t>(
+//   handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op);
+//
+// TODO: consider adding this to libraft.so or creating an instance in a
+// separate translation unit for this test.
+#undef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
 #include "../test_utils.cuh"
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/transpose.cuh>
 #include <raft/sparse/coo.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <raft/core/device_mdspan.hpp>
 #include <raft/sparse/hierarchy/single_linkage.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/test/core/handle.cpp b/cpp/test/core/handle.cpp
index 9f416d3ae8..fddfd58bb8 100644
--- a/cpp/test/core/handle.cpp
+++ b/cpp/test/core/handle.cpp
@@ -22,6 +22,7 @@
 #include <raft/core/comms.hpp>
 #include <raft/core/handle.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
 #include <unordered_map>
 
 namespace raft {
diff --git a/cpp/test/distance/dist_adj.cu b/cpp/test/distance/dist_adj.cu
index ce802e5138..bb63cc9be3 100644
--- a/cpp/test/distance/dist_adj.cu
+++ b/cpp/test/distance/dist_adj.cu
@@ -22,6 +22,8 @@
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include "dist_adj.cuh"
+
 namespace raft {
 namespace distance {
 
@@ -74,18 +76,6 @@ struct DistanceAdjInputs {
   unsigned long long int seed;
 };
 
-template <typename AccT, typename DataT, typename OutT, typename Index>
-struct threshold_final_op {
-  DataT threshold_val;
-
-  __device__ __host__ threshold_final_op() noexcept : threshold_val(0.0) {}
-  __device__ __host__ threshold_final_op(DataT val) noexcept : threshold_val(val) {}
-  __device__ __host__ OutT operator()(AccT d_val, Index g_idx) const noexcept
-  {
-    return d_val <= threshold_val;
-  }
-};
-
 template <typename DataType>
 ::std::ostream& operator<<(::std::ostream& os, const DistanceAdjInputs<DataType>& dims)
 {
@@ -140,7 +130,7 @@ class DistanceAdjTest : public ::testing::TestWithParam<DistanceAdjInputs<DataTy
                                                   n,
                                                   k,
                                                   workspace.data(),
-                                                  workspace.size(),
+                                                  worksize,
                                                   threshold_op,
                                                   isRowMajor);
     handle.sync_stream(stream);
diff --git a/cpp/test/distance/dist_adj.cuh b/cpp/test/distance/dist_adj.cuh
new file mode 100644
index 0000000000..ee4554ff29
--- /dev/null
+++ b/cpp/test/distance/dist_adj.cuh
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dist_adj_threshold.cuh"
+#include <raft/distance/distance.cuh>
+
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT)       \
+  extern template void raft::distance::distance<DT, DataT, AccT, OutT, FinalLambda, IdxT>( \
+    raft::resources const& handle,                                                         \
+    const DataT* x,                                                                        \
+    const DataT* y,                                                                        \
+    OutT* dist,                                                                            \
+    IdxT m,                                                                                \
+    IdxT n,                                                                                \
+    IdxT k,                                                                                \
+    void* workspace,                                                                       \
+    size_t worksize,                                                                       \
+    FinalLambda fin_op,                                                                    \
+    bool isRowMajor,                                                                       \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Expanded,
+                                   float,
+                                   float,
+                                   uint8_t,
+                                   raft::distance::threshold_float,
+                                   int);
+
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Expanded,
+                                   double,
+                                   double,
+                                   uint8_t,
+                                   raft::distance::threshold_double,
+                                   int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT)         \
+  extern template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>( \
+    const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, uint8_t, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, uint8_t, int);
+
+#undef instantiate_raft_distance_getWorkspaceSize
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT)         \
+  extern template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>( \
+    const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, uint8_t, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, uint8_t, int);
+
+#undef instantiate_raft_distance_getWorkspaceSize
diff --git a/cpp/test/distance/dist_adj_distance_instance.cu b/cpp/test/distance/dist_adj_distance_instance.cu
new file mode 100644
index 0000000000..d4685d8095
--- /dev/null
+++ b/cpp/test/distance/dist_adj_distance_instance.cu
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#undef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+#include "dist_adj_threshold.cuh"
+#include <cstdint>
+#include <raft/distance/distance-inl.cuh>
+
+#define instantiate_raft_distance_distance(DT, DataT, AccT, OutT, FinalLambda, IdxT) \
+  template void raft::distance::distance<DT, DataT, AccT, OutT, FinalLambda, IdxT>(  \
+    raft::resources const& handle,                                                   \
+    const DataT* x,                                                                  \
+    const DataT* y,                                                                  \
+    OutT* dist,                                                                      \
+    IdxT m,                                                                          \
+    IdxT n,                                                                          \
+    IdxT k,                                                                          \
+    void* workspace,                                                                 \
+    size_t worksize,                                                                 \
+    FinalLambda fin_op,                                                              \
+    bool isRowMajor,                                                                 \
+    DataT metric_arg)
+
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Expanded,
+                                   float,
+                                   float,
+                                   uint8_t,
+                                   raft::distance::threshold_float,
+                                   int);
+
+instantiate_raft_distance_distance(raft::distance::DistanceType::L2Expanded,
+                                   double,
+                                   double,
+                                   uint8_t,
+                                   raft::distance::threshold_double,
+                                   int);
+
+#undef instantiate_raft_distance_distance
+
+#define instantiate_raft_distance_getWorkspaceSize(DistT, DataT, AccT, OutT, IdxT)  \
+  template size_t raft::distance::getWorkspaceSize<DistT, DataT, AccT, OutT, IdxT>( \
+    const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k)
+
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, float, float, uint8_t, int);
+instantiate_raft_distance_getWorkspaceSize(
+  raft::distance::DistanceType::L2Expanded, double, double, uint8_t, int);
+
+#undef instantiate_raft_distance_getWorkspaceSize
diff --git a/cpp/test/distance/dist_adj_threshold.cuh b/cpp/test/distance/dist_adj_threshold.cuh
new file mode 100644
index 0000000000..78663b3cd1
--- /dev/null
+++ b/cpp/test/distance/dist_adj_threshold.cuh
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>  // uint8_t
+
+namespace raft::distance {
+
+template <typename AccT, typename DataT, typename OutT, typename Index>
+struct threshold_final_op {
+  DataT threshold_val;
+
+  __device__ __host__ threshold_final_op() noexcept : threshold_val(0.0) {}
+  __device__ __host__ threshold_final_op(DataT val) noexcept : threshold_val(val) {}
+  __device__ __host__ OutT operator()(AccT d_val, Index g_idx) const noexcept
+  {
+    return d_val <= threshold_val;
+  }
+};
+
+using threshold_float  = threshold_final_op<float, float, uint8_t, int>;
+using threshold_double = threshold_final_op<double, double, uint8_t, int>;
+
+}  // namespace raft::distance
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index b8c35461b1..60951daeb7 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -21,20 +21,11 @@
 #include <raft/core/device_mdspan.hpp>       // make_device_matrix_view
 #include <raft/core/device_resources.hpp>    // raft::device_resources
 #include <raft/core/operators.hpp>           // raft::sqrt
+#include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/random/rng.cuh>
 #include <rmm/device_uvector.hpp>            // rmm::device_uvector
 
-// When the distance library is precompiled, include only the raft_runtime
-// headers. This way, a small change in one of the kernel internals does not
-// trigger a rebuild of the test files (it of course still triggers a rebuild of
-// the raft specializations)
-#if defined RAFT_COMPILED
-#include <raft_runtime/distance/pairwise_distance.hpp>
-#else
-#include <raft/distance/distance.cuh>
-#endif
-
 namespace raft {
 namespace distance {
 
@@ -449,23 +440,12 @@ void distanceLauncher(raft::device_resources const& handle,
                       DataType threshold,
                       DataType metric_arg = 2.0f)
 {
-#if defined RAFT_COMPILED
-  // TODO: Implement and use mdspan-based
-  // raft::runtime::distance::pairwise_distance here.
-  //
-  // Context:
-  // https://github.com/rapidsai/raft/issues/1338
-  bool row_major = layout_to_row_major<layout>();
-  raft::runtime::distance::pairwise_distance(
-    handle, x, y, dist, m, n, k, distanceType, row_major, metric_arg);
-#else
   auto x_v    = make_device_matrix_view<DataType, int, layout>(x, m, k);
   auto y_v    = make_device_matrix_view<DataType, int, layout>(y, n, k);
   auto dist_v = make_device_matrix_view<DataType, int, layout>(dist, m, n);
 
   raft::distance::distance<distanceType, DataType, DataType, DataType, layout>(
     handle, x_v, y_v, dist_v, metric_arg);
-#endif
 }
 
 template <raft::distance::DistanceType distanceType, typename DataType>
@@ -573,13 +553,8 @@ class BigMatrixDistanceTest : public ::testing::Test {
                            float metric_arg);
     constexpr bool row_major   = true;
     constexpr float metric_arg = 0.0f;
-#if defined RAFT_COMPILED
-    raft::runtime::distance::pairwise_distance(
-      handle, x.data(), x.data(), dist.data(), m, n, k, distanceType, row_major, metric_arg);
-#else
     raft::distance::distance<distanceType, float, float, float>(
       handle, x.data(), x.data(), dist.data(), m, n, k, row_major, metric_arg);
-#endif
     RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
   }
 
diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu
index 383ad39319..c4ccd55f69 100644
--- a/cpp/test/distance/fused_l2_nn.cu
+++ b/cpp/test/distance/fused_l2_nn.cu
@@ -24,10 +24,6 @@
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 namespace raft {
 namespace distance {
 
diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu
index 47da201465..797e31c85d 100644
--- a/cpp/test/distance/gram.cu
+++ b/cpp/test/distance/gram.cu
@@ -14,10 +14,6 @@
  * limitations under the License.
  */
 
-#if defined RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 #include "../test_utils.cuh"
 #include "gram_base.cuh"
 #include <gtest/gtest.h>
diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu
index d01911206b..66d5a77dbf 100644
--- a/cpp/test/distance/masked_nn.cu
+++ b/cpp/test/distance/masked_nn.cu
@@ -28,10 +28,6 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/itertools.hpp>
 
-#ifdef RAFT_COMPILED
-#include <raft/distance/specializations.cuh>
-#endif
-
 namespace raft::distance::masked_nn {
 
 // The adjacency pattern determines what distances get computed.
diff --git a/cpp/test/ext_headers/00_generate.py b/cpp/test/ext_headers/00_generate.py
new file mode 100644
index 0000000000..15f90e1cc5
--- /dev/null
+++ b/cpp/test/ext_headers/00_generate.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+copyright_notice = """
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+"""
+
+ext_headers = [
+    "raft/neighbors/brute_force-ext.cuh",
+    "raft/distance/distance-ext.cuh",
+    "raft/distance/detail/pairwise_matrix/dispatch-ext.cuh",
+    "raft/matrix/detail/select_k-ext.cuh",
+    "raft/neighbors/ball_cover-ext.cuh",
+    "raft/spatial/knn/detail/fused_l2_knn-ext.cuh",
+    "raft/distance/fused_l2_nn-ext.cuh",
+    "raft/neighbors/ivf_pq-ext.cuh",
+    "raft/util/memory_pool-ext.hpp",
+    "raft/neighbors/ivf_flat-ext.cuh",
+    "raft/core/logger-ext.hpp",
+    "raft/neighbors/refine-ext.cuh",
+    "raft/neighbors/detail/ivf_flat_search-ext.cuh",
+    "raft/neighbors/detail/selection_faiss-ext.cuh",
+    "raft/linalg/detail/coalesced_reduction-ext.cuh",
+    "raft/spatial/knn/detail/ball_cover/registers-ext.cuh",
+    "raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh",
+    "raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh",
+]
+
+for ext_header in ext_headers:
+    header = ext_header.replace("-ext", "")
+
+    path = (
+        header
+        .replace("/", "_")
+        .replace(".cuh", ".cu")
+        .replace(".hpp", ".cpp")
+    )
+
+    with open(path, "w") as f:
+        f.write(copyright_notice)
+        f.write(f"#include <{header}>\n")
+
+    # For in CMakeLists.txt
+    print(f"test/ext_headers/{path}")
diff --git a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu b/cpp/test/ext_headers/raft_core_logger.cpp
similarity index 72%
rename from cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu
rename to cpp/test/ext_headers/raft_core_logger.cpp
index f7825e577a..18ba9ef48d 100644
--- a/cpp/src/distance/specializations/detail/kernels/tanh_kernel_float.cu
+++ b/cpp/test/ext_headers/raft_core_logger.cpp
@@ -1,5 +1,6 @@
+
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +15,13 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
 
-template class raft::distance::kernels::detail::TanhKernel<float>;
\ No newline at end of file
+#include <raft/core/logger.hpp>
diff --git a/cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu b/cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu
new file mode 100644
index 0000000000..02e4c8e331
--- /dev/null
+++ b/cpp/test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/distance/detail/pairwise_matrix/dispatch.cuh>
diff --git a/cpp/test/ext_headers/raft_distance_distance.cu b/cpp/test/ext_headers/raft_distance_distance.cu
new file mode 100644
index 0000000000..458d6385ed
--- /dev/null
+++ b/cpp/test/ext_headers/raft_distance_distance.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/distance/distance.cuh>
diff --git a/cpp/test/ext_headers/raft_distance_fused_l2_nn.cu b/cpp/test/ext_headers/raft_distance_fused_l2_nn.cu
new file mode 100644
index 0000000000..23ab58a67b
--- /dev/null
+++ b/cpp/test/ext_headers/raft_distance_fused_l2_nn.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/distance/fused_l2_nn.cuh>
diff --git a/cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu b/cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu
new file mode 100644
index 0000000000..7f94824287
--- /dev/null
+++ b/cpp/test/ext_headers/raft_linalg_detail_coalesced_reduction.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/linalg/detail/coalesced_reduction.cuh>
diff --git a/cpp/test/ext_headers/raft_matrix_detail_select_k.cu b/cpp/test/ext_headers/raft_matrix_detail_select_k.cu
new file mode 100644
index 0000000000..adb10f5bbb
--- /dev/null
+++ b/cpp/test/ext_headers/raft_matrix_detail_select_k.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/matrix/detail/select_k.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_ball_cover.cu b/cpp/test/ext_headers/raft_neighbors_ball_cover.cu
new file mode 100644
index 0000000000..8aaabe1872
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_ball_cover.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ball_cover.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_brute_force.cu b/cpp/test/ext_headers/raft_neighbors_brute_force.cu
new file mode 100644
index 0000000000..2c37799ae6
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_brute_force.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/brute_force.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu b/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu
new file mode 100644
index 0000000000..5a3a0b3f76
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_flat_interleaved_scan.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu b/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu
new file mode 100644
index 0000000000..a6274c1c80
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_flat_search.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu b/cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu
new file mode 100644
index 0000000000..fd5ad62204
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/ivf_pq_compute_similarity.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_detail_selection_faiss.cu b/cpp/test/ext_headers/raft_neighbors_detail_selection_faiss.cu
new file mode 100644
index 0000000000..f8bd21e86f
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_detail_selection_faiss.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/detail/selection_faiss.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_flat.cu b/cpp/test/ext_headers/raft_neighbors_ivf_flat.cu
new file mode 100644
index 0000000000..ab38e4c02c
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_ivf_flat.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_flat.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_ivf_pq.cu b/cpp/test/ext_headers/raft_neighbors_ivf_pq.cu
new file mode 100644
index 0000000000..43a66bde18
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_ivf_pq.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/ivf_pq.cuh>
diff --git a/cpp/test/ext_headers/raft_neighbors_refine.cu b/cpp/test/ext_headers/raft_neighbors_refine.cu
new file mode 100644
index 0000000000..6152f83aab
--- /dev/null
+++ b/cpp/test/ext_headers/raft_neighbors_refine.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/neighbors/refine.cuh>
diff --git a/cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu b/cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu
new file mode 100644
index 0000000000..39320a40c0
--- /dev/null
+++ b/cpp/test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/spatial/knn/detail/ball_cover/registers.cuh>
diff --git a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu b/cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu
similarity index 70%
rename from cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
rename to cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu
index 6609de69ac..f884d1b062 100644
--- a/cpp/src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
+++ b/cpp/test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu
@@ -1,5 +1,6 @@
+
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +15,13 @@
  * limitations under the License.
  */
 
-#include <raft/distance/detail/kernels/kernel_matrices.cuh>
-#include <raft/distance/specializations.cuh>
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
 
-template class raft::distance::kernels::detail::PolynomialKernel<float, int>;
\ No newline at end of file
+#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
diff --git a/cpp/test/ext_headers/raft_util_memory_pool.cpp b/cpp/test/ext_headers/raft_util_memory_pool.cpp
new file mode 100644
index 0000000000..11a024b958
--- /dev/null
+++ b/cpp/test/ext_headers/raft_util_memory_pool.cpp
@@ -0,0 +1,27 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by 00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python 00_generate.py
+ *
+ */
+
+#include <raft/util/memory_pool.hpp>
diff --git a/cpp/test/linalg/eigen_solvers.cu b/cpp/test/linalg/eigen_solvers.cu
index 1f29d7e275..ca34b0c3a4 100644
--- a/cpp/test/linalg/eigen_solvers.cu
+++ b/cpp/test/linalg/eigen_solvers.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include <raft/common/nvtx.hpp>
 #include <raft/core/device_resources.hpp>
+#include <raft/core/nvtx.hpp>
 #include <raft/spectral/eigen_solvers.cuh>
 #include <raft/spectral/partition.cuh>
 
@@ -24,6 +24,7 @@
 #include <cstddef>
 #include <iostream>
 #include <memory>
+#include <type_traits>
 
 namespace raft {
 namespace spectral {
diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu
index 2a40d70abc..7a8a5b7aa8 100644
--- a/cpp/test/matrix/select_k.cu
+++ b/cpp/test/matrix/select_k.cu
@@ -18,10 +18,6 @@
 
 #include <raft_internal/matrix/select_k.cuh>
 
-#ifdef RAFT_COMPILED
-#include <raft/matrix/specializations.cuh>
-#endif
-
 #include <raft/core/device_resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/sparse/detail/utils.h>
@@ -232,9 +228,10 @@ struct SelectK  // NOLINT
     auto& in_dists   = ref.get_in_dists();
     auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) {
       if (i == j) return true;
-      auto ix_i = uint64_t(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin());
-      auto ix_j = uint64_t(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin());
-      if (ix_i >= in_ids.size() || ix_j >= in_ids.size()) return false;
+      auto ix_i = static_cast<int64_t>(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin());
+      auto ix_j = static_cast<int64_t>(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin());
+      if (static_cast<size_t>(ix_i) >= in_ids.size() || static_cast<size_t>(ix_j) >= in_ids.size())
+        return false;
       auto dist_i = in_dists[ix_i];
       auto dist_j = in_dists[ix_j];
       if (dist_i == dist_j) return true;
@@ -434,7 +431,7 @@ INSTANTIATE_TEST_CASE_P(                          // NOLINT
                                    select::Algo::kWarpDistributedShm)));
 
 using ReferencedRandomDoubleSizeT =
-  SelectK<double, uint64_t, with_ref<select::Algo::kPublicApi>::params_random>;
+  SelectK<double, int64_t, with_ref<select::Algo::kPublicApi>::params_random>;
 TEST_P(ReferencedRandomDoubleSizeT, Run) { run(); }  // NOLINT
 INSTANTIATE_TEST_CASE_P(                             // NOLINT
   SelectK,
@@ -461,7 +458,7 @@ INSTANTIATE_TEST_CASE_P(                                 // NOLINT
                                    select::Algo::kRadix11bitsExtraPass)));
 
 using ReferencedRandomFloatSizeT =
-  SelectK<float, uint64_t, with_ref<select::Algo::kRadix8bits>::params_random>;
+  SelectK<float, int64_t, with_ref<select::Algo::kRadix8bits>::params_random>;
 TEST_P(ReferencedRandomFloatSizeT, LargeK) { run(); }  // NOLINT
 INSTANTIATE_TEST_CASE_P(SelectK,                       // NOLINT
                         ReferencedRandomFloatSizeT,
diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
index 71a83e2cca..1497a515d2 100644
--- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
@@ -18,10 +18,6 @@
 
 #include "../ann_cagra.cuh"
 
-// #if defined RAFT_DISTANCE_COMPILED
-// #include <raft/neighbors/specializations.cuh>
-// #endif
-
 namespace raft::neighbors::experimental::cagra {
 
 typedef AnnCagraTest<float, float, std::uint32_t> AnnCagraTestF;
diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh
index fe6f9163a0..4d90c3d7e4 100644
--- a/cpp/test/neighbors/ann_ivf_flat.cuh
+++ b/cpp/test/neighbors/ann_ivf_flat.cuh
@@ -36,10 +36,6 @@
 
 #include <thrust/sequence.h>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <cstddef>
 #include <iostream>
 #include <vector>
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu
index e430af89df..f0988ca988 100644
--- a/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu
+++ b/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu
@@ -18,10 +18,6 @@
 
 #include "../ann_ivf_flat.cuh"
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft::neighbors::ivf_flat {
 
 typedef AnnIVFFlatTest<float, float, std::int64_t> AnnIVFFlatTestF;
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
index e4e7a207fb..2f542bd6ec 100644
--- a/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
+++ b/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
@@ -18,10 +18,6 @@
 
 #include "../ann_ivf_flat.cuh"
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft::neighbors::ivf_flat {
 
 typedef AnnIVFFlatTest<float, int8_t, std::int64_t> AnnIVFFlatTestF_int8;
diff --git a/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
index ef7980401a..7659707089 100644
--- a/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
+++ b/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
@@ -18,10 +18,6 @@
 
 #include "../ann_ivf_flat.cuh"
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft::neighbors::ivf_flat {
 
 typedef AnnIVFFlatTest<float, uint8_t, std::int64_t> AnnIVFFlatTestF_uint8;
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index 07efcb099e..90c66ace06 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -27,12 +27,8 @@
 #include <raft/matrix/gather.cuh>
 #include <raft/neighbors/ivf_pq.cuh>
 #include <raft/neighbors/ivf_pq_helpers.cuh>
+#include <raft/neighbors/ivf_pq_serialize.cuh>
 #include <raft/random/rng.cuh>
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#else
-#pragma message("NN specializations are not enabled; expect very long building times.")
-#endif
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu
index c14afe4d70..3d362a5261 100644
--- a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu
+++ b/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu
@@ -14,6 +14,13 @@
  * limitations under the License.
  */
 
+// XXX: the uint32_t instance is not compiled in libraft.so. So we allow
+// instantiating the template here.
+//
+// TODO: consider removing this test or consider adding an instantiation to the
+// library.
+#undef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
 #include "../ann_ivf_pq.cuh"
 
 namespace raft::neighbors::ivf_pq {
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index fc448f014f..438c56da21 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdarray.hpp>  // raft::make_device_matrix
 #include <raft/distance/distance_types.hpp>
 #include <raft/matrix/detail/select_k.cuh>
 #include <raft/matrix/matrix.cuh>
diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/test/neighbors/ball_cover.cu
index 46ef3a9150..19935154df 100644
--- a/cpp/test/neighbors/ball_cover.cu
+++ b/cpp/test/neighbors/ball_cover.cu
@@ -23,10 +23,6 @@
 #include <raft/random/make_blobs.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/test/neighbors/epsilon_neighborhood.cu
index 769cb7ec2d..c78a15dd2d 100644
--- a/cpp/test/neighbors/epsilon_neighborhood.cu
+++ b/cpp/test/neighbors/epsilon_neighborhood.cu
@@ -23,10 +23,6 @@
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 namespace raft {
 namespace spatial {
 namespace knn {
diff --git a/cpp/test/neighbors/fused_l2_knn.cu b/cpp/test/neighbors/fused_l2_knn.cu
index ab05b41cc9..9fbccf681d 100644
--- a/cpp/test/neighbors/fused_l2_knn.cu
+++ b/cpp/test/neighbors/fused_l2_knn.cu
@@ -23,10 +23,6 @@
 #include <raft/random/rng.cuh>
 #include <raft/spatial/knn/knn.cuh>
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <raft/distance/distance.cuh>
 
 #include <rmm/device_buffer.hpp>
@@ -81,9 +77,9 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
     rmm::device_uvector<T> temp_distances(num_db_vecs * num_queries, stream_);
     distance::pairwise_distance(
       handle_,
-      raft::make_device_matrix_view<T, int64_t>(search_queries.data(), num_queries, dim),
-      raft::make_device_matrix_view<T, int64_t>(database.data(), num_db_vecs, dim),
-      raft::make_device_matrix_view<T, int64_t>(temp_distances.data(), num_queries, num_db_vecs),
+      raft::make_device_matrix_view<T, int32_t>(search_queries.data(), num_queries, dim),
+      raft::make_device_matrix_view<T, int32_t>(database.data(), num_db_vecs, dim),
+      raft::make_device_matrix_view<T, int32_t>(temp_distances.data(), num_queries, num_db_vecs),
       metric);
 
     spatial::knn::select_k<int64_t, T>(temp_distances.data(),
diff --git a/cpp/test/neighbors/knn.cu b/cpp/test/neighbors/knn.cu
index edac73b073..a03a761c7e 100644
--- a/cpp/test/neighbors/knn.cu
+++ b/cpp/test/neighbors/knn.cu
@@ -21,10 +21,6 @@
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/brute_force.cuh>
 
-#ifdef RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <rmm/device_buffer.hpp>
 
 #include <gtest/gtest.h>
diff --git a/cpp/test/neighbors/refine.cu b/cpp/test/neighbors/refine.cu
index dd3491673e..d868ba06cf 100644
--- a/cpp/test/neighbors/refine.cu
+++ b/cpp/test/neighbors/refine.cu
@@ -31,10 +31,6 @@
 
 #include <gtest/gtest.h>
 
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
-
 #include <vector>
 
 namespace raft::neighbors {
diff --git a/cpp/test/neighbors/selection.cu b/cpp/test/neighbors/selection.cu
index 9f13de357c..a21ff9f99e 100644
--- a/cpp/test/neighbors/selection.cu
+++ b/cpp/test/neighbors/selection.cu
@@ -17,6 +17,8 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <numeric>
+#include <raft/neighbors/detail/selection_faiss.cuh>
+#include <raft/neighbors/detail/selection_faiss_helpers.cuh>  // kFaissMax
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -24,9 +26,6 @@
 
 #include <raft/sparse/detail/utils.h>
 #include <raft/spatial/knn/knn.cuh>
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
 
 namespace raft::spatial::selection {
 
diff --git a/cpp/test/neighbors/tiled_knn.cu b/cpp/test/neighbors/tiled_knn.cu
index ccc3a64edd..aa46fc29f1 100644
--- a/cpp/test/neighbors/tiled_knn.cu
+++ b/cpp/test/neighbors/tiled_knn.cu
@@ -20,14 +20,13 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/distance/distance.cuh>  // raft::distance::pairwise_distance
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/transpose.cuh>
 #include <raft/matrix/init.cuh>
 #include <raft/neighbors/brute_force.cuh>
-
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
+#include <raft/neighbors/detail/knn_brute_force.cuh>  // raft::neighbors::detail::brute_force_knn_impl
+#include <raft/neighbors/detail/selection_faiss.cuh>  // raft::neighbors::detail::select_k
 
 #include <rmm/device_buffer.hpp>
 
diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu
index d200744329..e14cd9a180 100644
--- a/cpp/test/sparse/neighbors/connect_components.cu
+++ b/cpp/test/sparse/neighbors/connect_components.cu
@@ -14,6 +14,15 @@
  * limitations under the License.
  */
 
+// XXX: We allow the instantiation of fused_l2_nn here:
+// raft::linkage::FixConnectivitiesRedOp<value_idx, value_t> red_op(colors.data(), params.n_row);
+// raft::linkage::connect_components<value_idx, value_t>(
+//   handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op);
+//
+// TODO: consider adding this to libraft.so or creating an instance in a
+// separate translation unit for this test.
+#undef RAFT_EXPLICIT_INSTANTIATE_ONLY
+
 #include <gtest/gtest.h>
 
 #include <cub/cub.cuh>
diff --git a/cpp/test/sparse/neighbors/knn_graph.cu b/cpp/test/sparse/neighbors/knn_graph.cu
index 8873445c37..aadb00879b 100644
--- a/cpp/test/sparse/neighbors/knn_graph.cu
+++ b/cpp/test/sparse/neighbors/knn_graph.cu
@@ -22,9 +22,6 @@
 
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/neighbors/knn_graph.cuh>
-#if defined RAFT_COMPILED
-#include <raft/neighbors/specializations.cuh>
-#endif
 
 #include <iostream>
 
diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu
index 40b7e59d81..9ad89d59c0 100644
--- a/cpp/test/stats/silhouette_score.cu
+++ b/cpp/test/stats/silhouette_score.cu
@@ -20,10 +20,6 @@
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/cudart_utils.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/stats/specializations.cuh>
-#endif
-
 #include <raft/stats/silhouette_score.cuh>
 #include <random>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu
index 2fde6b29c1..15b27c7669 100644
--- a/cpp/test/stats/trustworthiness.cu
+++ b/cpp/test/stats/trustworthiness.cu
@@ -20,10 +20,6 @@
 #include <raft/distance/distance.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#if defined RAFT_COMPILED
-#include <raft/stats/specializations.cuh>
-#endif
-
 #include <raft/stats/trustworthiness_score.cuh>
 #include <vector>
 
diff --git a/docs/source/build.md b/docs/source/build.md
index 262c5703bc..bd2afe6638 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -4,7 +4,7 @@
 
 The easiest way to install RAFT is through conda and several packages are provided.
 - `libraft-headers` RAFT headers
-- `libraft` (optional) shared library containing pre-compiled template specializations and runtime API.
+- `libraft` (optional) shared library containing pre-compiled template instantiations and runtime API.
 - `pylibraft` (optional) Python wrappers around RAFT algorithms and primitives.
 - `raft-dask` (optional) enables deployment of multi-node multi-GPU algorithms that use RAFT `raft::comms` in Dask clusters.
 
@@ -276,15 +276,7 @@ If the RAFT headers have already been installed into your environment with cmake
 
 Use `find_package(raft COMPONENTS compiled distributed)` to enable the shared library and transitively pass dependencies through separate targets for each component. In this example, the `raft::compiled` and `raft::distributed` targets will be available for configuring linking paths in addition to `raft::raft`. These targets will also pass through any transitive dependencies (such as NCCL for the `distributed` component).
 
-The pre-compiled libraries contain template specializations for commonly used types, such as single- and double-precision floating-point. In order to use the symbols in the pre-compiled libraries, the compiler needs to be told not to instantiate templates that are already contained in the shared libraries. By convention, these header files are named `specializations.cuh` and located in the base directory for the packages that contain specializations.
-
-The following example tells the compiler to ignore the pre-compiled templates for the `raft::distance` API so any symbols already compiled into the `libraft` shared library will be used instead. RAFT's cmake creates a variable `RAFT_COMPILED` which can be used to ignore the pre-compiled template specializations only when the shared library has been enabled through cmake (such as by specifying the `compiled` component in `find_package`):
-```c++
-#ifdef RAFT_COMPILED
-#include <raft/distance/distance.cuh>
-#include <raft/distance/specializations.cuh>
-#endif
-```
+The pre-compiled libraries contain template instantiations for commonly used types, such as single- and double-precision floating-point. By default, these are used automatically when the `RAFT_COMPILED` macro is defined during compilation. This definition is automatically added by CMake.
 
 ### Building RAFT C++ from source in cmake
 
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index 99712fc996..c206808d21 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -291,6 +291,97 @@ Sometimes, we need to temporarily change the log pattern (eg: for reporting deci
 
 4. Before creating a new primitive, check to see if one exists already. If one exists but the API isn't flexible enough to include your use-case, consider first refactoring the existing primitive. If that is not possible without an extreme number of changes, consider how the public API could be made more flexible. If the new primitive is different enough from all existing primitives, consider whether an existing public API could invoke the new primitive as an option or argument. If the new primitive is different enough from what exists already, add a header for the new public API function to the appropriate subdirectory and namespace.
 
+## Header organization of expensive function templates
+
+RAFT is a heavily templated library. Several core functions are expensive to compile and we want to prevent duplicate compilation of this functionality. To limit build time, RAFT provides a precompiled library (libraft.so) where expensive function templates are instantiated for the most commonly used template parameters. To prevent (1) accidental instantiation of these templates and (2) unnecessary dependency on the internals of these templates, we use a split header structure and define macros to control template instantiation. This section describes the macros and header structure.
+
+**Macros.** We define the macros `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY`. The `RAFT_COMPILED` macro is defined by `CMake` when compiling code that (1) is part of `libraft.so` or (2) is linked with `libraft.so`. It indicates that a precompiled `libraft.so` is present at runtime.
+
+The `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro is defined by `CMake` during compilation of `libraft.so` itself. When defined, it indicates that implicit instantiations of expensive function templates are forbidden (they result in a compiler error). In the RAFT project, we additionally define this macro during compilation of the tests and benchmarks. 
+
+Below, we summarize which combinations of `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` are used in practice and what the effect of the combination is. 
+
+| RAFT_COMPILED | RAFT_EXPLICIT_INSTANTIATE_ONLY | Which targets                                                                                        |
+|---------------|--------------------------------|------------------------------------------------------------------------------------------------------|
+| defined       | defined                        | `raft::compiled`, RAFT tests, RAFT benchmarks                                                        |
+| defined       |                                | Downstream libraries depending on `libraft` like cuML, cuGraph.                                      |
+|               |                                | Downstream libraries depending on `libraft-headers` like cugraph-ops.                                |
+
+
+| RAFT_COMPILED | RAFT_EXPLICIT_INSTANTIATE_ONLY | Effect                                                                                                |
+|---------------|--------------------------------|-------------------------------------------------------------------------------------------------------|
+| defined       | defined                        | Templates are precompiled. Compiler error on accidental instantiation of expensive function template. |
+| defined       |                                | Templates are precompiled. Implicit instantiation allowed.                                            |
+|               |                                | Nothing precompiled. Implicit instantiation allowed.                                                  |
+|               | defined                        | Avoid this: nothing precompiled. Compiler error on any instantiation of expensive function template.  |
+
+
+
+**Header organization.** Any header file that defines an expensive function template (say `expensive.cuh`) should be split in three parts: `expensive.cuh`, `expensive-inl.cuh`, and `expensive-ext.cuh`. The file `expensive-inl.cuh` ("inl" for "inline") contains the template definitions, i.e., the actual code. The file `expensive.cuh` includes one or both of the other two files, depending on the values of the `RAFT_COMPILED` and `RAFT_EXPLICIT_INSTANTIATE_ONLY` macros. The file `expensive-ext.cuh` contains `extern template` instantiations. In addition, if `RAFT_EXPLICIT_INSTANTIATE_ONLY` is set, it contains template definitions to ensure that a compiler error is raised in case of accidental instantiation.
+
+The dispatching by `expensive.cuh` is performed as follows:
+``` c++
+#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY
+// If implicit instantiation is allowed, include template definitions.
+#include "expensive-inl.cuh"
+#endif
+
+#ifdef RAFT_COMPILED
+// Include extern template instantiations when RAFT is compiled.
+#include "expensive-ext.cuh"
+#endif
+```
+
+The file `expensive-inl.cuh` is unchanged:
+``` c++
+namespace raft {
+template <typename T>
+void expensive(T arg) {
+  // .. function body
+}
+} // namespace raft
+```
+
+The file `expensive-ext.cuh` contains the following:
+``` c++
+#include <raft/util/raft_explicit.cuh> // RAFT_EXPLICIT
+
+#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY
+namespace raft {
+// (1) define templates to raise an error in case of accidental instantiation 
+template <typename T> void expensive(T arg) RAFT_EXPLICIT;
+} // namespace raft
+#endif //RAFT_EXPLICIT_INSTANTIATE_ONLY
+
+// (2) Provide extern template instantiations.
+extern template void raft::expensive<int>(int);
+extern template void raft::expensive<float>(float);
+```
+
+This header has two responsibilities: (1) define templates to raise an error in case of accidental instantiation and (2) provide `extern template` instantiations.
+First, if `RAFT_EXPLICIT_INSTANTIATE_ONLY` is set, `expensive` is defined. This is done for two reasons: (1) to give a definition, because the definition in `expensive-inl.cuh` was skipped and (2) to indicate that the template should be explicitly instantiated by taging it with the `RAFT_EXPLICIT` macro. This macro defines the function body, and it ensures that an informative error message is generated when an implicit instantiation erroneously occurs. Finally, the `extern template` instantiations are listed.
+
+To actually generate the code for the template instances, the file `src/expensive.cu` contains the following. Note that the only difference between the extern template instantiations in `expensive-ext.cuh` and these lines are the removal of the word `extern`:
+
+``` c++
+#include <raft/expensive-inl.cuh>
+
+template void raft::expensive<int>(int);
+template void raft::expensive<float>(float);
+```
+
+**Design considerations**: 
+
+1. In the `-ext.cuh` header, do not include implementation headers. Only include function parameter types and types that are used to instantiate the templates. If a primitive takes custom parameter types, define them in a separate header called `<primitive_name>_types.hpp`. (see [Common Design Considerations](https://github.com/rapidsai/raft/blob/7b065aff81a0b1976e2a9e2f3de6690361a1111b/docs/source/developer_guide.md#common-design-considerations)).
+
+2. Keep docstrings in the `-inl.cuh` header, as it is closer to the code. Remove docstrings from template definitions in the `-ext.cuh` header. Make sure to explicitly include public APIs in the RAFT API docs. That is, add `#include <raft/expensive.cuh>` to the docs in `docs/source/cpp_api/expensive.rst` (instead of `#include <raft/expensive-inl.cuh>`).
+
+3. The order of inclusion in `expensive.cuh` is extremely important. If `RAFT_EXPLICIT_INSTANTIATE_ONLY` is not defined, but `RAFT_COMPILED` is defined, then we must include the template definitions before the `extern template` instantiations.
+
+4. If a header file defines multiple expensive templates, it can be that one of them is not instantiated. In this case, **do define** the template with `RAFT_EXPLICIT` in the `-ext` header. This way, when the template is instantiated, the developer gets a helpful error message instead of a confusing "function not found".
+
+This header structure was proposed in [issue #1416](https://github.com/rapidsai/raft/issues/1416), which contains more background on the motivation of this structure and the mechanics of C++ template instantiation. 
+
 ## Testing
 
 It's important for RAFT to maintain a high test coverage of the public APIs in order to minimize the potential for downstream projects to encounter unexpected build or runtime behavior as a result of changes.
diff --git a/docs/source/using_libraft.md b/docs/source/using_libraft.md
index f4f966f2c8..ef055184e7 100644
--- a/docs/source/using_libraft.md
+++ b/docs/source/using_libraft.md
@@ -1,59 +1,64 @@
 # Using The Pre-Compiled Binary
 
-At its core, RAFT is a header-only template library, which makes it very powerful in that APIs can be called with various different combinations of data types and only the templates which are actually used will be compiled into your binaries. This increased flexibility comes with a drawback that all the APIs need to be declared inline and thus calls which are made frequently in your code could be compiled again each source file for which they are invoked.
+At its core, RAFT is a header-only template library, which makes it very powerful in that APIs can be called with various different combinations of data types and only the templates which are actually used will be compiled into your binaries. This increased flexibility comes with a drawback that all the APIs need to be declared inline and thus calls which are made frequently in your code could be compiled again in each source file for which they are invoked.
 
-For most functions, this overhead is pretty minimal and not noticeable but some of RAFT's APIs consist of very complex hierarchies of function calls that ultimately end up dispatching to device code that's executed on the GPU. The compile times for these APIs may still be bearable when compiling for only a single compute architecture but could end up becoming extremely slow to compile for all of the supported architectures at once.
+For most functions, compile-time overhead is minimal but some of RAFT's APIs take a substantial time to compile. As a rule of thumb, most functionality in `raft::distance`, `raft::neighbors`, and `raft::cluster` is expensive to compile and most functionality in other namespaces has little compile-time overhead.
 
-There are three ways to solve this problem and speed up compile times:
-1. Continue to use RAFT as a header-only library and create a CUDA source file in your project to explicitly instantiate the templates which are slow to compile. This can be tedious and will still require compiling the slow code at least once, but it's the most flexible option if you are using types that aren't already compiled into `libraft`
-2. If you are able to use one of the template types that are already being compiled into `libraft`, you can use the pre-compiled template specializations, which I will describe in more detail in the following section.
-3. If you would like to use RAFT but either cannot or would prefer not to compile any CUDA code yourself, you can simply add `libraft` to your link libraries and use the growing set of runtime APIs.
+There are three ways to speed up compile times:
 
-## Using Template Specializations
+1. Continue to use RAFT as a header-only library and create a CUDA source file
+   in your project to explicitly instantiate the templates which are slow to
+   compile. This can be tedious and will still require compiling the slow code
+   at least once, but it's the most flexible option if you are using types that
+   aren't already compiled into `libraft`
 
-As mentioned above, the pre-compiled template instantiations can save a lot of time if you are able to use the type combinations for the templates which are already specialized in the `libraft` binary. This will, of course, mean that you will need to add `libraft` to your link libraries.
+2. If you are able to use one of the template types that are already being
+   compiled into `libraft`, you can use the pre-compiled template
+   instantiations, which are described in more detail in the following section.
 
-At the top level of each namespace containing pre-compiled template specializations is a header file called `specializations.cuh`. This header file includes `extern template` directives for all the specializations which are compiled into libraft. As an example, including `raft/neighbors/specializations.cuh` in one of your source files will effectively tell the compiler to skip over any of the template specializations that are already compiled into the `libraft` binary.
+3. If you would like to use RAFT but either cannot or would prefer not to
+   compile any CUDA code yourself, you can simply add `libraft` to your link
+   libraries and use the growing set of `raft::runtime` APIs.
 
-### How do I verify template specializations didn't compile into my binary?
+### How do I verify template instantiations didn't compile into my binary?
 
-Which specializations were chosen to instantiations were based on compile time analysis and reuse. This means you can't assume that all specializations are for the public API itself. Take the following example in `raft/neighbors/specializations/detail/ivf_pq_compute_similarity.cuh`:
+To verify that you are not accidentally instantiating templates that have not been pre-compiled in RAFT, set the `RAFT_EXPLICIT_INSTANTIATE_ONLY` macro. This only works if you are linking with the pre-compiled libraft (i.e., when `RAFT_COMPILED` has been defined). To check if, for instance, `raft::distance::distance` has been precompiled with specific template arguments, you can set `RAFT_EXPLICIT_INSTANTIATE_ONLY` at the top of the file you are compiling, as in the following example:
 
 ```c++
-namespace raft::neighbors::ivf_pq::detail {
-
-namespace {
-using fp8s_t = fp_8bit<5, true>;
-using fp8u_t = fp_8bit<5, false>;
-}  // namespace
-
-#define RAFT_INST(OutT, LutT)                                                                     \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, true, true>(uint32_t, uint32_t)  \
-    ->compute_similarity_kernel_t<OutT, LutT>;                                                    \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, true, false>(uint32_t, uint32_t) \
-    ->compute_similarity_kernel_t<OutT, LutT>;                                                    \
-  extern template auto get_compute_similarity_kernel<OutT, LutT, false, true>(uint32_t, uint32_t) \
-    ->compute_similarity_kernel_t<OutT, LutT>;
-
-#define RAFT_INST_ALL_OUT_T(LutT) \
-  RAFT_INST(float, LutT)          \
-  RAFT_INST(half, LutT)
-
-RAFT_INST_ALL_OUT_T(float)
-RAFT_INST_ALL_OUT_T(half)
-RAFT_INST_ALL_OUT_T(fp8s_t)
-RAFT_INST_ALL_OUT_T(fp8u_t)
-
-#undef RAFT_INST
-#undef RAFT_INST_ALL_OUT_T
-
-}  // namespace raft::neighbors::ivf_pq::detail
-```
 
-We can see here that the function `raft::neighbors::ivf_pq::detail::get_compute_similarity_kernel` is being instantiated for the cartesian product of `OutT={float, half, fp8s_t, fp8u_t}` and `LutT={float, half}`. After linking against the `libraft` binary and including `raft/neighbors/specializations.cuh` in your source file, you can invoke the `raft::neighbors::ivf_pq` functions and compile your code. If the specializations are working, you should be able to use `nm -g -C --defined-only /path/to/your/binary | grep raft::neighbors::ivf_pq::detail::get_compute_similarity::kernel` and you shouldn't see any results, because those symbols should be coming from the `libraft` binary and skipped from compiling into your binary.
+#ifdef RAFT_COMPILED
+#define RAFT_EXPLICIT_INSTANTIATE_ONLY
+#endif
+
+#include <cstdint>
+#include <raft/core/device_resources.hpp>
+#include <raft/distance/distance.cuh>
+
+int main()
+{
+  raft::resources handle{};
+
+  // Change IdxT to uint64_t and you will get an error because you are
+  // instantiating a template that has not been pre-compiled.
+  using IdxT = int;
+
+  const float* x = nullptr;
+  const float* y = nullptr;
+  float* out     = nullptr;
+  int m          = 1024;
+  int n          = 1024;
+  int k          = 1024;
+  bool row_major = true;
+  raft::distance::distance<raft::distance::DistanceType::L1, float, float, float, IdxT>(
+    handle, x, y, out, m, n, k, row_major, 2.0f);
+}
+```
 
 ## Runtime APIs
 
-RAFT contains a growing list of runtime APIs that, unlike the pre-compiled template specializations, allow you to link against `libraft` and invoke RAFT directly from `cpp` files. The benefit to RAFT's runtime APIs are two-fold- unlike the template specializations, which still require your code be compiled with the CUDA compiler (`nvcc`), the `runtime` APIs are the lightweight wrappers which enable `pylibraft`.
+RAFT contains a growing list of runtime APIs that, unlike the pre-compiled
+template instantiations, allow you to link against `libraft` and invoke RAFT
+directly from `cpp` files. The benefit to RAFT's runtime APIs is that they can
+be used from code that is compiled with a `c++` compiler (rather than the CUDA
+compiler `nvcc`). This enables the `runtime` APIs to power `pylibraft`.
 
-Similar to the pre-compiled template specializations, RAFT's runtime APIs 
\ No newline at end of file

From a44ca96c5cddec7ca67b510f3c21163d3958bc7e Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Fri, 28 Apr 2023 14:46:44 -0400
Subject: [PATCH 34/78] Revert shared-action-workflows pin (#1475)

This PR reverts the shared-action-workflows branch pin that was used to drop Python 3.8 to branch-23.06

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1475
---
 .github/workflows/build.yaml | 16 ++++++++--------
 .github/workflows/pr.yaml    | 22 +++++++++++-----------
 .github/workflows/test.yaml  |  8 ++++----
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index aea876d89c..0f5f84c158 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch' && github.event_name == 'push'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: branch
       node_type: "gpu-v100-latest-1"
@@ -66,7 +66,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -100,7 +100,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index bc4ae5891c..c51d5c0a34 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,41 +23,41 @@ jobs:
       - wheel-build-raft-dask
       - wheel-tests-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -76,7 +76,7 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
@@ -98,7 +98,7 @@ jobs:
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index d8add3af5a..05e96a6dff 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -24,7 +24,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -44,7 +44,7 @@ jobs:
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@py-39
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

From 9200b41b674b59b0bd4d20e795f8cd610213141b Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Tue, 2 May 2023 07:13:24 -0500
Subject: [PATCH 35/78] enable local cache hits (#1478)

---
 conda/recipes/libraft/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index 83468d90af..b89fcfb788 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -36,6 +36,7 @@ outputs:
         - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
         - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
         - SCCACHE_S3_USE_SSL
+        - SCCACHE_S3_NO_CREDENTIALS
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       ignore_run_exports_from:

From 6dc5c71424a7bac37e8df008fa923dafc64a193d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 3 May 2023 13:05:45 -0700
Subject: [PATCH 36/78] Pin to scikit-build<17.2 (#1487)

The latest scikit-build version breaks rapids-cython

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1487
---
 conda/environments/all_cuda-118_arch-x86_64.yaml       | 2 +-
 conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                      | 2 +-
 python/pylibraft/pyproject.toml                        | 2 +-
 python/raft-dask/pyproject.toml                        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index d192aefa7c..8015122634 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -45,7 +45,7 @@ dependencies:
 - pytest-cov
 - recommonmark
 - rmm==23.6.*
-- scikit-build>=0.13.1
+- scikit-build>=0.13.1,<0.17.2
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 2013c16fa4..3ea560025e 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -32,6 +32,6 @@ dependencies:
 - nccl>=2.9.9
 - ninja
 - nlohmann_json>=3.11.2
-- scikit-build>=0.13.1
+- scikit-build>=0.13.1,<0.17.2
 - sysroot_linux-64==2.17
 name: bench_ann_cuda-118_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index bc0fbd409e..5b691d1464 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -109,7 +109,7 @@ dependencies:
           - cmake>=3.23.1,!=3.25.0
           - cython>=0.29,<0.30
           - ninja
-          - scikit-build>=0.13.1
+          - scikit-build>=0.13.1,<0.17.2
       - output_types: [conda]
         packages:
           - c-compiler
diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml
index 0fb311ae3b..3c600324ce 100644
--- a/python/pylibraft/pyproject.toml
+++ b/python/pylibraft/pyproject.toml
@@ -20,7 +20,7 @@ requires = [
     "cython>=0.29,<0.30",
     "ninja",
     "rmm==23.6.*",
-    "scikit-build>=0.13.1",
+    "scikit-build>=0.13.1,<0.17.2",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 602148f758..ac6a35b5ab 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -18,7 +18,7 @@ requires = [
     "cmake>=3.23.1,!=3.25.0",
     "cython>=0.29,<0.30",
     "ninja",
-    "scikit-build>=0.13.1",
+    "scikit-build>=0.13.1,<0.17.2",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 2dfbe83081a54f54820ad82e35d939c882cb09a1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 3 May 2023 14:40:11 -0700
Subject: [PATCH 37/78] Build wheels using new single image workflow (#1477)

Update wheel builds to use the new version of the workflow.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/raft/pull/1477
---
 .github/workflows/build.yaml |  8 ++++----
 .github/workflows/pr.yaml    | 10 +++++-----
 .github/workflows/test.yaml  |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 0f5f84c158..00a3aac95a 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -66,7 +66,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@manylinux_v2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -100,7 +100,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@manylinux_v2
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c51d5c0a34..4c4c545a78 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -76,7 +76,7 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -88,17 +88,17 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
     with:
       build_type: pull-request
       package-name: raft_dask
       package-dir: python/raft-dask
-      before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-wheelhouse"
+      before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft && python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl"
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
     with:
       build_type: pull-request
       package-name: raft_dask
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 05e96a6dff..17b2d6f83b 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -44,7 +44,7 @@ jobs:
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

From 3eb30e45845b3c6dd2bd7c145172b567b66afbf6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 4 May 2023 07:15:17 -0700
Subject: [PATCH 38/78] Update cupy dependency (#1488)

Switch to requiring cupy 12

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/raft/pull/1488
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 7 ++++++-
 python/pylibraft/pyproject.toml                  | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8015122634..5ba52c4fad 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cuda-profiler-api=11.8.86
 - cuda-python >=11.7.1,<12.0
 - cudatoolkit=11.8
-- cupy
+- cupy>=12.0.0
 - cxx-compiler
 - cython>=0.29,<0.30
 - dask-core==2023.3.2
diff --git a/dependencies.yaml b/dependencies.yaml
index 5b691d1464..630ba99c59 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -286,6 +286,11 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - cupy
           - scikit-learn
           - scipy
+      - output_types: conda
+        packages:
+          - cupy>=12.0.0
+      - output_types: pyproject
+        packages:
+          - cupy-cuda11x>=12.0.0
diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml
index 3c600324ce..dc3095e2d1 100644
--- a/python/pylibraft/pyproject.toml
+++ b/python/pylibraft/pyproject.toml
@@ -50,7 +50,7 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cupy",
+    "cupy-cuda11x>=12.0.0",
     "pytest",
     "pytest-cov",
     "scikit-learn",

From 212cfe9d34aa356662d6f4665cb6246e65117deb Mon Sep 17 00:00:00 2001
From: Tamas Bela Feher <tfeher@nvidia.com>
Date: Thu, 4 May 2023 16:15:56 +0200
Subject: [PATCH 39/78] Add missing ext declaration for log detail::format
 (#1482)

The `format` function is used by [debug and trace loggers](https://github.com/rapidsai/raft/blob/a44ca96c5cddec7ca67b510f3c21163d3958bc7e/cpp/include/raft/core/logger-macros.hpp#L44-L75). While PR #1469 has restructured the logger headers it was forgotten to expose `detail::format` in case the `RAFT_EXPLICIT_INSTANTIATE_ONLY` is defined. This PR fixes that.

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Allard Hendriksen (https://github.com/ahendriksen)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1482
---
 cpp/include/raft/core/logger-ext.hpp | 10 +++++++---
 cpp/include/raft/core/logger-inl.hpp |  2 +-
 cpp/test/core/logger.cpp             |  5 +++++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/core/logger-ext.hpp b/cpp/include/raft/core/logger-ext.hpp
index 69688560c7..8fd29cf1d6 100644
--- a/cpp/include/raft/core/logger-ext.hpp
+++ b/cpp/include/raft/core/logger-ext.hpp
@@ -15,15 +15,19 @@
  */
 #pragma once
 
-#include <memory>         // std::unique_ptr
-#include <string>         // std::string
-#include <unordered_map>  // std::unordered_map
+#include <memory>                       // std::unique_ptr
+#include <raft/core/detail/macros.hpp>  // RAFT_INLINE_CONDITIONAL
+#include <string>                       // std::string
+#include <unordered_map>                // std::unordered_map
 
 namespace raft {
 
 static const std::string RAFT_NAME = "raft";
 static const std::string default_log_pattern("[%L] [%H:%M:%S.%f] %v");
 
+namespace detail {
+RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...);
+}
 /**
  * @brief The main Logging class for raft library.
  *
diff --git a/cpp/include/raft/core/logger-inl.hpp b/cpp/include/raft/core/logger-inl.hpp
index a90023d01f..fcfa1f1333 100644
--- a/cpp/include/raft/core/logger-inl.hpp
+++ b/cpp/include/raft/core/logger-inl.hpp
@@ -54,7 +54,7 @@ inline std::string format(const char* fmt, va_list& vl)
   return std::string(buf.data());
 }
 
-inline std::string format(const char* fmt, ...)
+RAFT_INLINE_CONDITIONAL std::string format(const char* fmt, ...)
 {
   va_list vl;
   va_start(vl, fmt);
diff --git a/cpp/test/core/logger.cpp b/cpp/test/core/logger.cpp
index 3f29c9f12c..d7f001a700 100644
--- a/cpp/test/core/logger.cpp
+++ b/cpp/test/core/logger.cpp
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+// We set RAFT_ACTIVE_LEVEL to a value that would enable testing trace and debug logs
+// (otherwise trace and debug logs are desabled by default).
+#undef RAFT_ACTIVE_LEVEL
+#define RAFT_ACTIVE_LEVEL 6
+
 #include <gtest/gtest.h>
 #include <raft/core/logger.hpp>
 #include <string>

From 641f164e9442c28694eec23391905680585ab7a9 Mon Sep 17 00:00:00 2001
From: Tamas Bela Feher <tfeher@nvidia.com>
Date: Fri, 5 May 2023 01:11:41 +0200
Subject: [PATCH 40/78] Remove pool_size() calls from debug printouts (#1484)

The return type of `get_pool_memory_resource` was changed in #1469 from `pool_memory_resource` to `device_memory_resource`. There are debug logs in the code ([example](https://github.com/rapidsai/raft/blob/a44ca96c5cddec7ca67b510f3c21163d3958bc7e/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh#L1328-L1329)), which query the pool size, that would fail when debug logging is enabled. This PR removes the `pool_size() ` calls, so that the code can be compiled with debug mode on.

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1484
---
 cpp/include/raft/cluster/detail/kmeans_balanced.cuh    |  2 +-
 cpp/include/raft/matrix/detail/select_radix.cuh        |  7 ++-----
 cpp/include/raft/matrix/detail/select_warpsort.cuh     |  5 +----
 .../raft/neighbors/detail/cagra/cagra_build.cuh        |  5 +----
 .../raft/neighbors/detail/ivf_flat_search-inl.cuh      |  2 +-
 cpp/include/raft/neighbors/detail/ivf_pq_build.cuh     | 10 ++--------
 cpp/include/raft/neighbors/detail/ivf_pq_search.cuh    |  2 +-
 cpp/include/raft/util/memory_pool-inl.hpp              |  2 +-
 8 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
index 4f7cae1ad9..eb89ebe402 100644
--- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
@@ -976,7 +976,7 @@ void build_hierarchical(const raft::device_resources& handle,
     raft::get_pool_memory_resource(device_memory, mem_per_row * size_t(max_minibatch_size));
   if (pool_guard) {
     RAFT_LOG_DEBUG("build_hierarchical: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
+                   mem_per_row * size_t(max_minibatch_size));
   }
 
   // Precompute the L2 norm of the dataset if relevant.
diff --git a/cpp/include/raft/matrix/detail/select_radix.cuh b/cpp/include/raft/matrix/detail/select_radix.cuh
index 7ac40ac0eb..b7d02d6b52 100644
--- a/cpp/include/raft/matrix/detail/select_radix.cuh
+++ b/cpp/include/raft/matrix/detail/select_radix.cuh
@@ -778,7 +778,7 @@ void radix_topk(const T* in,
   auto pool_guard = raft::get_pool_memory_resource(mr, mem_req);
   if (pool_guard) {
     RAFT_LOG_DEBUG("radix::select_k: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
+                   mem_req);
   }
 
   rmm::device_uvector<Counter<T, IdxT>> counters(max_chunk_size, stream, mr);
@@ -1031,10 +1031,7 @@ void radix_topk_one_block(const T* in,
                                    max_chunk_size * len * 2 * (sizeof(T) + sizeof(IdxT)) +
                                      256 * 4  // might need extra memory for alignment
     );
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("radix::select_k: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
-  }
+  if (pool_guard) { RAFT_LOG_DEBUG("radix::select_k: using pool memory resource"); }
 
   rmm::device_uvector<T> buf1(len * max_chunk_size, stream, mr);
   rmm::device_uvector<IdxT> idx_buf1(len * max_chunk_size, stream, mr);
diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh
index c19e9391ce..dc86a04733 100644
--- a/cpp/include/raft/matrix/detail/select_warpsort.cuh
+++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh
@@ -990,10 +990,7 @@ void select_k_(int num_of_block,
 {
   auto pool_guard = raft::get_pool_memory_resource(
     mr, num_of_block * k * batch_size * 2 * std::max(sizeof(T), sizeof(IdxT)));
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("warpsort::select_k: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
-  }
+  if (pool_guard) { RAFT_LOG_DEBUG("warpsort::select_k: using pool memory resource"); }
 
   rmm::device_uvector<T> tmp_val(num_of_block * k * batch_size, stream, mr);
   rmm::device_uvector<IdxT> tmp_idx(num_of_block * k * batch_size, stream, mr);
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
index 4d63fb7999..54c806ba13 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
@@ -130,10 +130,7 @@ void build_knn_graph(raft::device_resources const& res,
 
   rmm::mr::device_memory_resource* device_memory = nullptr;
   auto pool_guard = raft::get_pool_memory_resource(device_memory, 1024 * 1024);
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("ivf_pq using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
-  }
+  if (pool_guard) { RAFT_LOG_DEBUG("ivf_pq using pool memory resource"); }
 
   raft::spatial::knn::detail::utils::batch_load_iterator<DataT> vec_batches(dataset.data_handle(),
                                                                             dataset.extent(0),
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
index 89a4597acf..c364118fdd 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
@@ -216,7 +216,7 @@ inline void search(raft::device_resources const& handle,
   auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
   if (pool_guard) {
     RAFT_LOG_DEBUG("ivf_flat::search: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
+                   n_queries * n_probes * k * 16ull);
   }
 
   return search_impl<T, float, IdxT>(handle,
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index 208f7fd875..b17b3a3559 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -1324,10 +1324,7 @@ void extend(raft::device_resources const& handle,
 
   rmm::mr::device_memory_resource* device_memory = nullptr;
   auto pool_guard = raft::get_pool_memory_resource(device_memory, 1024 * 1024);
-  if (pool_guard) {
-    RAFT_LOG_DEBUG("ivf_pq::extend: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
-  }
+  if (pool_guard) { RAFT_LOG_DEBUG("ivf_pq::extend: using pool memory resource"); }
 
   rmm::mr::managed_memory_resource managed_memory_upstream;
   rmm::mr::pool_memory_resource<rmm::mr::managed_memory_resource> managed_memory(
@@ -1536,10 +1533,7 @@ auto build(raft::device_resources const& handle,
 
     rmm::mr::device_memory_resource* device_memory = nullptr;
     auto pool_guard = raft::get_pool_memory_resource(device_memory, 1024 * 1024);
-    if (pool_guard) {
-      RAFT_LOG_DEBUG("ivf_pq::build: using pool memory resource with initial size %zu bytes",
-                     pool_guard->pool_size());
-    }
+    if (pool_guard) { RAFT_LOG_DEBUG("ivf_pq::build: using pool memory resource"); }
 
     rmm::mr::managed_memory_resource managed_memory_upstream;
     rmm::mr::pool_memory_resource<rmm::mr::managed_memory_resource> managed_memory(
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 0aa2862cf4..c1c15d3424 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -767,7 +767,7 @@ inline void search(raft::device_resources const& handle,
   auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
   if (pool_guard) {
     RAFT_LOG_DEBUG("ivf_pq::search: using pool memory resource with initial size %zu bytes",
-                   pool_guard->pool_size());
+                   n_queries * n_probes * k * 16ull);
   }
 
   // Maximum number of query vectors to search at the same time.
diff --git a/cpp/include/raft/util/memory_pool-inl.hpp b/cpp/include/raft/util/memory_pool-inl.hpp
index a227b6e53f..070c8f4e30 100644
--- a/cpp/include/raft/util/memory_pool-inl.hpp
+++ b/cpp/include/raft/util/memory_pool-inl.hpp
@@ -36,7 +36,7 @@ namespace raft {
  *   void my_func(..., size_t n, rmm::mr::device_memory_resource* mr = nullptr) {
  *     auto pool_guard = raft::get_pool_memory_resource(mr, 2 * n * sizeof(float));
  *     if (pool_guard){
- *       RAFT_LOG_INFO("Created a pool %zu bytes", pool_guard->pool_size());
+ *       RAFT_LOG_INFO("Created a pool");
  *     } else {
  *       RAFT_LOG_INFO("Using the current default or explicitly passed device memory resource");
  *     }

From aa9d6866a80b250f6a3696b188bf596f7b9a4a76 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 5 May 2023 09:22:01 -0400
Subject: [PATCH 41/78] Using `raft::resources` across `raft::random` (#1420)

Eventually we need to do this across all the headers in the codebase so that users have a choice as to whether they want to use `raft::device_resources` (which implicitly depends on the cuda math libs and thrust) or whether they just want to use `raft::resources` (which is agnostic of the resources it contains and allows the primitives themselves to levvy the dependency requirements).

cc @MatthiasKohl this *should* allow cugraph-ops to completely remove the math libs dependency (though the conda recipes will also need to be changed to depend on `libraft-headers-only` and the cmake changed to turn off the CTK math libs dependency).

**NOTE**: Before this PR is merged, it's important that it be tested w/ cugraph/cuml at the very least to spot any cases where the `device_resources.hpp` include was being assumed transitively from the RAFT functions.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1420
---
 cpp/include/raft/core/mdarray.hpp             |   5 +-
 cpp/include/raft/linalg/detail/qr.cuh         |  12 +-
 cpp/include/raft/linalg/detail/transpose.cuh  |  20 +--
 cpp/include/raft/linalg/qr.cuh                |  19 ++-
 cpp/include/raft/linalg/transpose.cuh         |   5 +-
 .../raft/random/detail/make_regression.cuh    |  13 +-
 .../random/detail/multi_variable_gaussian.cuh |  36 +++--
 .../detail/rmat_rectangular_generator.cuh     |  22 ++-
 .../random/detail/rng_impl_deprecated.cuh     |   4 +-
 cpp/include/raft/random/make_blobs.cuh        |   6 +-
 cpp/include/raft/random/make_regression.cuh   |   8 +-
 .../raft/random/multi_variable_gaussian.cuh   |   5 +-
 cpp/include/raft/random/permute.cuh           |  16 +-
 .../random/rmat_rectangular_generator.cuh     |  13 +-
 cpp/include/raft/random/rng.cuh               | 151 +++++++++---------
 .../random/sample_without_replacement.cuh     |   7 +-
 cpp/test/core/temporary_device_buffer.cu      |   1 +
 cpp/test/distance/dist_adj.cu                 |   1 +
 cpp/test/linalg/transpose.cu                  |   1 +
 cpp/test/matrix/columnSort.cu                 |   1 +
 cpp/test/random/make_blobs.cu                 |   2 +
 cpp/test/random/make_regression.cu            |   2 +
 cpp/test/random/multi_variable_gaussian.cu    |   2 +
 cpp/test/random/permute.cu                    |   2 +
 cpp/test/random/rmat_rectangular_generator.cu |   2 +
 cpp/test/random/rng.cu                        |   2 +
 cpp/test/random/rng_discrete.cu               |   2 +
 cpp/test/random/rng_int.cu                    |   1 +
 cpp/test/random/sample_without_replacement.cu |   1 +
 cpp/test/stats/histogram.cu                   |   1 +
 cpp/test/stats/minmax.cu                      |   1 +
 31 files changed, 214 insertions(+), 150 deletions(-)

diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index 618e307f5d..c7350a978c 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -197,8 +197,9 @@ class mdarray
 #endif  // RAFT_MDARRAY_CTOR_CONSTEXPR
 
   /**
-   * @brief The only constructor that can create storage, this is to make sure CUDA stream is being
-   * used.
+   * @brief The only constructor that can create storage, raft::resources is accepted
+   * so that the device implementation can make sure the relevant CUDA stream is
+   * being used for allocation.
    */
   RAFT_MDARRAY_CTOR_CONSTEXPR mdarray(raft::resources const& handle,
                                       mapping_type const& m,
diff --git a/cpp/include/raft/linalg/detail/qr.cuh b/cpp/include/raft/linalg/detail/qr.cuh
index 4cba028d87..bc7c551d89 100644
--- a/cpp/include/raft/linalg/detail/qr.cuh
+++ b/cpp/include/raft/linalg/detail/qr.cuh
@@ -18,6 +18,8 @@
 
 #include "cublas_wrappers.hpp"
 #include "cusolver_wrappers.hpp"
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/matrix.cuh>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -42,10 +44,10 @@ namespace detail {
  */
 template <typename math_t>
 void qrGetQ_inplace(
-  raft::device_resources const& handle, math_t* Q, int n_rows, int n_cols, cudaStream_t stream)
+  raft::resources const& handle, math_t* Q, int n_rows, int n_cols, cudaStream_t stream)
 {
   RAFT_EXPECTS(n_rows >= n_cols, "QR decomposition expects n_rows >= n_cols.");
-  cusolverDnHandle_t cusolver = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolver = resource::get_cusolver_dn_handle(handle);
 
   rmm::device_uvector<math_t> tau(n_cols, stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(tau.data(), 0, sizeof(math_t) * n_cols, stream));
@@ -83,7 +85,7 @@ void qrGetQ_inplace(
 }
 
 template <typename math_t>
-void qrGetQ(raft::device_resources const& handle,
+void qrGetQ(raft::resources const& handle,
             const math_t* M,
             math_t* Q,
             int n_rows,
@@ -95,7 +97,7 @@ void qrGetQ(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void qrGetQR(raft::device_resources const& handle,
+void qrGetQR(raft::resources const& handle,
              math_t* M,
              math_t* Q,
              math_t* R,
@@ -103,7 +105,7 @@ void qrGetQR(raft::device_resources const& handle,
              int n_cols,
              cudaStream_t stream)
 {
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   int m = n_rows, n = n_cols;
   rmm::device_uvector<math_t> R_full(m * n, stream);
diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh
index 05588bda9c..bbd71a4cf1 100644
--- a/cpp/include/raft/linalg/detail/transpose.cuh
+++ b/cpp/include/raft/linalg/detail/transpose.cuh
@@ -19,7 +19,9 @@
 #include "cublas_wrappers.hpp"
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/exec_policy.hpp>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -29,14 +31,14 @@ namespace linalg {
 namespace detail {
 
 template <typename math_t>
-void transpose(raft::device_resources const& handle,
+void transpose(raft::resources const& handle,
                math_t* in,
                math_t* out,
                int n_rows,
                int n_cols,
                cudaStream_t stream)
 {
-  cublasHandle_t cublas_h = handle.get_cublas_handle();
+  cublasHandle_t cublas_h = resource::get_cublas_handle(handle);
   RAFT_CUBLAS_TRY(cublasSetStream(cublas_h, stream));
 
   int out_n_rows = n_cols;
@@ -83,7 +85,7 @@ void transpose(math_t* inout, int n, cudaStream_t stream)
 
 template <typename T, typename IndexType, typename LayoutPolicy, typename AccessorPolicy>
 void transpose_row_major_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> in,
   raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> out)
 {
@@ -92,7 +94,7 @@ void transpose_row_major_impl(
   T constexpr kOne  = 1;
   T constexpr kZero = 0;
 
-  CUBLAS_TRY(cublasgeam(handle.get_cublas_handle(),
+  CUBLAS_TRY(cublasgeam(resource::get_cublas_handle(handle),
                         CUBLAS_OP_T,
                         CUBLAS_OP_N,
                         out_n_cols,
@@ -105,12 +107,12 @@ void transpose_row_major_impl(
                         out.stride(0),
                         out.data_handle(),
                         out.stride(0),
-                        handle.get_stream()));
+                        resource::get_cuda_stream(handle)));
 }
 
 template <typename T, typename IndexType, typename LayoutPolicy, typename AccessorPolicy>
 void transpose_col_major_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> in,
   raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> out)
 {
@@ -119,7 +121,7 @@ void transpose_col_major_impl(
   T constexpr kOne  = 1;
   T constexpr kZero = 0;
 
-  CUBLAS_TRY(cublasgeam(handle.get_cublas_handle(),
+  CUBLAS_TRY(cublasgeam(resource::get_cublas_handle(handle),
                         CUBLAS_OP_T,
                         CUBLAS_OP_N,
                         out_n_rows,
@@ -132,7 +134,7 @@ void transpose_col_major_impl(
                         out.stride(1),
                         out.data_handle(),
                         out.stride(1),
-                        handle.get_stream()));
+                        resource::get_cuda_stream(handle)));
 }
 };  // end namespace detail
 };  // end namespace linalg
diff --git a/cpp/include/raft/linalg/qr.cuh b/cpp/include/raft/linalg/qr.cuh
index 8e58af63c1..948996d0ac 100644
--- a/cpp/include/raft/linalg/qr.cuh
+++ b/cpp/include/raft/linalg/qr.cuh
@@ -19,6 +19,8 @@
 #pragma once
 
 #include "detail/qr.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
@@ -33,7 +35,7 @@ namespace linalg {
  * @param stream cuda stream
  */
 template <typename math_t>
-void qrGetQ(raft::device_resources const& handle,
+void qrGetQ(raft::resources const& handle,
             const math_t* M,
             math_t* Q,
             int n_rows,
@@ -54,7 +56,7 @@ void qrGetQ(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-void qrGetQR(raft::device_resources const& handle,
+void qrGetQR(raft::resources const& handle,
              math_t* M,
              math_t* Q,
              math_t* R,
@@ -77,13 +79,18 @@ void qrGetQR(raft::device_resources const& handle,
  * @param[out] Q Output raft::device_matrix_view
  */
 template <typename ElementType, typename IndexType>
-void qr_get_q(raft::device_resources const& handle,
+void qr_get_q(raft::resources const& handle,
               raft::device_matrix_view<const ElementType, IndexType, raft::col_major> M,
               raft::device_matrix_view<ElementType, IndexType, raft::col_major> Q)
 {
   RAFT_EXPECTS(Q.size() == M.size(), "Size mismatch between Output and Input");
 
-  qrGetQ(handle, M.data_handle(), Q.data_handle(), M.extent(0), M.extent(1), handle.get_stream());
+  qrGetQ(handle,
+         M.data_handle(),
+         Q.data_handle(),
+         M.extent(0),
+         M.extent(1),
+         resource::get_cuda_stream(handle));
 }
 
 /**
@@ -94,7 +101,7 @@ void qr_get_q(raft::device_resources const& handle,
  * @param[out] R Output raft::device_matrix_view
  */
 template <typename ElementType, typename IndexType>
-void qr_get_qr(raft::device_resources const& handle,
+void qr_get_qr(raft::resources const& handle,
                raft::device_matrix_view<const ElementType, IndexType, raft::col_major> M,
                raft::device_matrix_view<ElementType, IndexType, raft::col_major> Q,
                raft::device_matrix_view<ElementType, IndexType, raft::col_major> R)
@@ -107,7 +114,7 @@ void qr_get_qr(raft::device_resources const& handle,
           R.data_handle(),
           M.extent(0),
           M.extent(1),
-          handle.get_stream());
+          resource::get_cuda_stream(handle));
 }
 
 /** @} */
diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh
index 2f31cfd722..0fe752347d 100644
--- a/cpp/include/raft/linalg/transpose.cuh
+++ b/cpp/include/raft/linalg/transpose.cuh
@@ -20,6 +20,7 @@
 
 #include "detail/transpose.cuh"
 #include <raft/core/device_mdarray.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
@@ -34,7 +35,7 @@ namespace linalg {
  * @param stream: cuda stream
  */
 template <typename math_t>
-void transpose(raft::device_resources const& handle,
+void transpose(raft::resources const& handle,
                math_t* in,
                math_t* out,
                int n_rows,
@@ -76,7 +77,7 @@ void transpose(math_t* inout, int n, cudaStream_t stream)
  * @param[out] out    Output matirx, storage is pre-allocated by caller.
  */
 template <typename T, typename IndexType, typename LayoutPolicy, typename AccessorPolicy>
-auto transpose(raft::device_resources const& handle,
+auto transpose(raft::resources const& handle,
                raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> in,
                raft::mdspan<T, raft::matrix_extent<IndexType>, LayoutPolicy, AccessorPolicy> out)
   -> std::enable_if_t<std::is_floating_point_v<T>, void>
diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh
index 01d97d496d..1715dcbe81 100644
--- a/cpp/include/raft/random/detail/make_regression.cuh
+++ b/cpp/include/raft/random/detail/make_regression.cuh
@@ -22,7 +22,8 @@
 
 #include <algorithm>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/init.cuh>
@@ -52,7 +53,7 @@ static __global__ void _singular_profile_kernel(DataT* out, IdxT n, DataT tail_s
 
 /* Internal auxiliary function to generate a low-rank matrix */
 template <typename DataT, typename IdxT>
-static void _make_low_rank_matrix(raft::device_resources const& handle,
+static void _make_low_rank_matrix(raft::resources const& handle,
                                   DataT* out,
                                   IdxT n_rows,
                                   IdxT n_cols,
@@ -61,8 +62,7 @@ static void _make_low_rank_matrix(raft::device_resources const& handle,
                                   raft::random::RngState& r,
                                   cudaStream_t stream)
 {
-  cusolverDnHandle_t cusolver_handle = handle.get_cusolver_dn_handle();
-  cublasHandle_t cublas_handle       = handle.get_cublas_handle();
+  cublasHandle_t cublas_handle = resource::get_cublas_handle(handle);
 
   IdxT n = std::min(n_rows, n_cols);
 
@@ -143,7 +143,7 @@ static __global__ void _gather2d_kernel(
 }
 
 template <typename DataT, typename IdxT>
-void make_regression_caller(raft::device_resources const& handle,
+void make_regression_caller(raft::resources const& handle,
                             DataT* out,
                             DataT* values,
                             IdxT n_rows,
@@ -162,8 +162,7 @@ void make_regression_caller(raft::device_resources const& handle,
 {
   n_informative = std::min(n_informative, n_cols);
 
-  cusolverDnHandle_t cusolver_handle = handle.get_cusolver_dn_handle();
-  cublasHandle_t cublas_handle       = handle.get_cublas_handle();
+  cublasHandle_t cublas_handle = resource::get_cublas_handle(handle);
 
   cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);
   raft::random::RngState r(seed, type);
diff --git a/cpp/include/raft/random/detail/multi_variable_gaussian.cuh b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh
index 16f50446ae..68934ac1ff 100644
--- a/cpp/include/raft/random/detail/multi_variable_gaussian.cuh
+++ b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh
@@ -20,7 +20,10 @@
 #include <memory>
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/detail/cusolver_wrappers.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
@@ -139,18 +142,16 @@ class multi_variable_gaussian_impl {
   int *info, Lwork, info_h;
   syevjInfo_t syevj_params = NULL;
   curandGenerator_t gen;
-  raft::device_resources const& handle;
+  raft::resources const& handle;
   cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR;
   bool deinitilized      = false;
 
  public:  // functions
   multi_variable_gaussian_impl() = delete;
-  multi_variable_gaussian_impl(raft::device_resources const& handle,
-                               const int dim,
-                               Decomposer method)
+  multi_variable_gaussian_impl(raft::resources const& handle, const int dim, Decomposer method)
     : handle(handle), dim(dim), method(method)
   {
-    auto cusolverHandle = handle.get_cusolver_dn_handle();
+    auto cusolverHandle = resource::get_cusolver_dn_handle(handle);
 
     CURAND_CHECK(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
     CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(gen, 28));  // SEED
@@ -191,9 +192,9 @@ class multi_variable_gaussian_impl {
 
   void give_gaussian(const int nPoints, T* P, T* X, const T* x = 0)
   {
-    auto cusolverHandle = handle.get_cusolver_dn_handle();
-    auto cublasHandle   = handle.get_cublas_handle();
-    auto cudaStream     = handle.get_stream();
+    auto cusolverHandle = resource::get_cusolver_dn_handle(handle);
+    auto cublasHandle   = resource::get_cublas_handle(handle);
+    auto cudaStream     = resource::get_cuda_stream(handle);
     if (method == chol_decomp) {
       // lower part will contains chol_decomp
       RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf(
@@ -299,7 +300,7 @@ class multi_variable_gaussian_setup_token;
 
 template <typename ValueType>
 multi_variable_gaussian_setup_token<ValueType> build_multi_variable_gaussian_token_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   rmm::mr::device_memory_resource& mem_resource,
   const int dim,
   const multi_variable_gaussian_decomposition_method method);
@@ -315,7 +316,7 @@ template <typename ValueType>
 class multi_variable_gaussian_setup_token {
   template <typename T>
   friend multi_variable_gaussian_setup_token<T> build_multi_variable_gaussian_token_impl(
-    raft::device_resources const& handle,
+    raft::resources const& handle,
     rmm::mr::device_memory_resource& mem_resource,
     const int dim,
     const multi_variable_gaussian_decomposition_method method);
@@ -342,7 +343,7 @@ class multi_variable_gaussian_setup_token {
 
   // Constructor, only for use by friend functions.
   // Hiding this will let us change the implementation in the future.
-  multi_variable_gaussian_setup_token(raft::device_resources const& handle,
+  multi_variable_gaussian_setup_token(raft::resources const& handle,
                                       rmm::mr::device_memory_resource& mem_resource,
                                       const int dim,
                                       const multi_variable_gaussian_decomposition_method method)
@@ -399,14 +400,15 @@ class multi_variable_gaussian_setup_token {
 
  private:
   std::unique_ptr<multi_variable_gaussian_impl<ValueType>> impl_;
-  raft::device_resources const& handle_;
+  raft::resources const& handle_;
   rmm::mr::device_memory_resource& mem_resource_;
   int dim_ = 0;
 
   auto allocate_workspace() const
   {
     const auto num_elements = impl_->get_workspace_size();
-    return rmm::device_uvector<ValueType>{num_elements, handle_.get_stream(), &mem_resource_};
+    return rmm::device_uvector<ValueType>{
+      num_elements, resource::get_cuda_stream(handle_), &mem_resource_};
   }
 
   int dim() const { return dim_; }
@@ -414,7 +416,7 @@ class multi_variable_gaussian_setup_token {
 
 template <typename ValueType>
 multi_variable_gaussian_setup_token<ValueType> build_multi_variable_gaussian_token_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   rmm::mr::device_memory_resource& mem_resource,
   const int dim,
   const multi_variable_gaussian_decomposition_method method)
@@ -434,7 +436,7 @@ void compute_multi_variable_gaussian_impl(
 
 template <typename ValueType>
 void compute_multi_variable_gaussian_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   rmm::mr::device_memory_resource& mem_resource,
   std::optional<raft::device_vector_view<const ValueType, int>> x,
   raft::device_matrix_view<ValueType, int, raft::col_major> P,
@@ -455,7 +457,7 @@ class multi_variable_gaussian : public detail::multi_variable_gaussian_impl<T> {
   // using detail::multi_variable_gaussian_impl<T>::Decomposer::qr;
 
   multi_variable_gaussian() = delete;
-  multi_variable_gaussian(raft::device_resources const& handle,
+  multi_variable_gaussian(raft::resources const& handle,
                           const int dim,
                           typename detail::multi_variable_gaussian_impl<T>::Decomposer method)
     : detail::multi_variable_gaussian_impl<T>{handle, dim, method}
diff --git a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
index b5e0610405..d00fc29056 100644
--- a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
+++ b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
@@ -18,7 +18,8 @@
 
 #include "rmat_rectangular_generator_types.cuh"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng_device.cuh>
 #include <raft/random/rng_state.hpp>
 #include <raft/util/cuda_utils.cuh>
@@ -206,7 +207,7 @@ void rmat_rectangular_gen_caller(IdxT* out,
  * @param[in]  c_scale 2^c_scale represents the number of destination nodes
  */
 template <typename IdxT, typename ProbT>
-void rmat_rectangular_gen_impl(raft::device_resources const& handle,
+void rmat_rectangular_gen_impl(raft::resources const& handle,
                                raft::random::RngState& r,
                                raft::device_vector_view<const ProbT, IdxT> theta,
                                raft::random::detail::rmat_rectangular_gen_output<IdxT> output,
@@ -247,7 +248,7 @@ void rmat_rectangular_gen_impl(raft::device_resources const& handle,
                               r_scale,
                               c_scale,
                               n_edges,
-                              handle.get_stream(),
+                              resource::get_cuda_stream(handle),
                               r);
 }
 
@@ -259,7 +260,7 @@ void rmat_rectangular_gen_impl(raft::device_resources const& handle,
  * `theta` parameter.
  */
 template <typename IdxT, typename ProbT>
-void rmat_rectangular_gen_impl(raft::device_resources const& handle,
+void rmat_rectangular_gen_impl(raft::resources const& handle,
                                raft::random::RngState& r,
                                raft::random::detail::rmat_rectangular_gen_output<IdxT> output,
                                ProbT a,
@@ -286,8 +287,17 @@ void rmat_rectangular_gen_impl(raft::device_resources const& handle,
   IdxT* out_dst_ptr            = out_dst_has_value ? (*out_dst).data_handle() : nullptr;
   const IdxT n_edges           = output.number_of_edges();
 
-  detail::rmat_rectangular_gen_caller(
-    out_ptr, out_src_ptr, out_dst_ptr, a, b, c, r_scale, c_scale, n_edges, handle.get_stream(), r);
+  detail::rmat_rectangular_gen_caller(out_ptr,
+                                      out_src_ptr,
+                                      out_dst_ptr,
+                                      a,
+                                      b,
+                                      c,
+                                      r_scale,
+                                      c_scale,
+                                      n_edges,
+                                      resource::get_cuda_stream(handle),
+                                      r);
 }
 
 }  // end namespace detail
diff --git a/cpp/include/raft/random/detail/rng_impl_deprecated.cuh b/cpp/include/raft/random/detail/rng_impl_deprecated.cuh
index 362c844fb3..8895d22cf0 100644
--- a/cpp/include/raft/random/detail/rng_impl_deprecated.cuh
+++ b/cpp/include/raft/random/detail/rng_impl_deprecated.cuh
@@ -23,7 +23,7 @@
 #include "rng_device.cuh"
 
 #include <curand_kernel.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng_state.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/detail/cub_wrappers.cuh>
@@ -259,7 +259,7 @@ class RngImpl {
 
   template <typename DataT, typename WeightsT, typename IdxT = int>
   METHOD_DEPR(sampleWithoutReplacement)
-  void sampleWithoutReplacement(raft::device_resources const& handle,
+  void sampleWithoutReplacement(raft::resources const& handle,
                                 DataT* out,
                                 IdxT* outIdx,
                                 const DataT* in,
diff --git a/cpp/include/raft/random/make_blobs.cuh b/cpp/include/raft/random/make_blobs.cuh
index 7aa0362f6d..079ab43b74 100644
--- a/cpp/include/raft/random/make_blobs.cuh
+++ b/cpp/include/raft/random/make_blobs.cuh
@@ -22,6 +22,8 @@
 #include "detail/make_blobs.cuh"
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::random {
 
@@ -129,7 +131,7 @@ void make_blobs(DataT* out,
  */
 template <typename DataT, typename IdxT, typename layout>
 void make_blobs(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<DataT, IdxT, layout> out,
   raft::device_vector_view<IdxT, IdxT> labels,
   IdxT n_clusters                                                        = 5,
@@ -167,7 +169,7 @@ void make_blobs(
                             (IdxT)out.extent(0),
                             (IdxT)out.extent(1),
                             n_clusters,
-                            handle.get_stream(),
+                            resource::get_cuda_stream(handle),
                             row_major,
                             prm_centers,
                             prm_cluster_std,
diff --git a/cpp/include/raft/random/make_regression.cuh b/cpp/include/raft/random/make_regression.cuh
index f4a7e82308..0aa9cc4daa 100644
--- a/cpp/include/raft/random/make_regression.cuh
+++ b/cpp/include/raft/random/make_regression.cuh
@@ -26,6 +26,8 @@
 #include <algorithm>
 #include <optional>
 #include <raft/core/mdarray.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 #include "detail/make_regression.cuh"
 
@@ -67,7 +69,7 @@ namespace raft::random {
  * @param[in]   type            Random generator type
  */
 template <typename DataT, typename IdxT>
-void make_regression(raft::device_resources const& handle,
+void make_regression(raft::resources const& handle,
                      DataT* out,
                      DataT* values,
                      IdxT n_rows,
@@ -138,7 +140,7 @@ void make_regression(raft::device_resources const& handle,
  * @param[in]   type            Random generator type
  */
 template <typename DataT, typename IdxT>
-void make_regression(raft::device_resources const& handle,
+void make_regression(raft::resources const& handle,
                      raft::device_matrix_view<DataT, IdxT, raft::row_major> out,
                      raft::device_matrix_view<DataT, IdxT, raft::row_major> values,
                      IdxT n_informative,
@@ -170,7 +172,7 @@ void make_regression(raft::device_resources const& handle,
                                  n_samples,
                                  n_features,
                                  n_informative,
-                                 handle.get_stream(),
+                                 resource::get_cuda_stream(handle),
                                  coef_ptr,
                                  n_targets,
                                  bias,
diff --git a/cpp/include/raft/random/multi_variable_gaussian.cuh b/cpp/include/raft/random/multi_variable_gaussian.cuh
index 91a7695f2c..eada1c9521 100644
--- a/cpp/include/raft/random/multi_variable_gaussian.cuh
+++ b/cpp/include/raft/random/multi_variable_gaussian.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include "detail/multi_variable_gaussian.cuh"
+#include <raft/core/resources.hpp>
 #include <raft/random/random_types.hpp>
 
 namespace raft::random {
@@ -30,7 +31,7 @@ namespace raft::random {
  */
 
 template <typename ValueType>
-void multi_variable_gaussian(raft::device_resources const& handle,
+void multi_variable_gaussian(raft::resources const& handle,
                              rmm::mr::device_memory_resource& mem_resource,
                              std::optional<raft::device_vector_view<const ValueType, int>> x,
                              raft::device_matrix_view<ValueType, int, raft::col_major> P,
@@ -41,7 +42,7 @@ void multi_variable_gaussian(raft::device_resources const& handle,
 }
 
 template <typename ValueType>
-void multi_variable_gaussian(raft::device_resources const& handle,
+void multi_variable_gaussian(raft::resources const& handle,
                              std::optional<raft::device_vector_view<const ValueType, int>> x,
                              raft::device_matrix_view<ValueType, int, raft::col_major> P,
                              raft::device_matrix_view<ValueType, int, raft::col_major> X,
diff --git a/cpp/include/raft/random/permute.cuh b/cpp/include/raft/random/permute.cuh
index 16de1d676d..d349b68add 100644
--- a/cpp/include/raft/random/permute.cuh
+++ b/cpp/include/raft/random/permute.cuh
@@ -23,7 +23,8 @@
 
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <type_traits>
 
 namespace raft::random {
@@ -94,7 +95,7 @@ using perms_out_view_t = typename perms_out_view<T, InputOutputValueType, IdxTyp
  *   then we recommend Knuth Shuffle.
  */
 template <typename InputOutputValueType, typename IntType, typename IdxType, typename Layout>
-void permute(raft::device_resources const& handle,
+void permute(raft::resources const& handle,
              raft::device_matrix_view<const InputOutputValueType, IdxType, Layout> in,
              std::optional<raft::device_vector_view<IntType, IdxType>> permsOut,
              std::optional<raft::device_matrix_view<InputOutputValueType, IdxType, Layout>> out)
@@ -127,8 +128,13 @@ void permute(raft::device_resources const& handle,
   if (permsOut_ptr != nullptr || out_ptr != nullptr) {
     const IdxType N = in.extent(0);
     const IdxType D = in.extent(1);
-    detail::permute<InputOutputValueType, IntType, IdxType>(
-      permsOut_ptr, out_ptr, in.data_handle(), D, N, is_row_major, handle.get_stream());
+    detail::permute<InputOutputValueType, IntType, IdxType>(permsOut_ptr,
+                                                            out_ptr,
+                                                            in.data_handle(),
+                                                            D,
+                                                            N,
+                                                            is_row_major,
+                                                            resource::get_cuda_stream(handle));
   }
 }
 
@@ -141,7 +147,7 @@ template <typename InputOutputValueType,
           typename Layout,
           typename PermsOutType,
           typename OutType>
-void permute(raft::device_resources const& handle,
+void permute(raft::resources const& handle,
              raft::device_matrix_view<const InputOutputValueType, IdxType, Layout> in,
              PermsOutType&& permsOut,
              OutType&& out)
diff --git a/cpp/include/raft/random/rmat_rectangular_generator.cuh b/cpp/include/raft/random/rmat_rectangular_generator.cuh
index d578794d31..90cd9baf81 100644
--- a/cpp/include/raft/random/rmat_rectangular_generator.cuh
+++ b/cpp/include/raft/random/rmat_rectangular_generator.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "detail/rmat_rectangular_generator.cuh"
+#include <raft/core/resources.hpp>
 
 namespace raft::random {
 
@@ -78,7 +79,7 @@ namespace raft::random {
  */
 template <typename IdxT, typename ProbT>
 void rmat_rectangular_gen(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::random::RngState& r,
   raft::device_vector_view<const ProbT, IdxT> theta,
   raft::device_mdspan<IdxT, raft::extents<IdxT, raft::dynamic_extent, 2>, raft::row_major> out,
@@ -102,7 +103,7 @@ void rmat_rectangular_gen(
  * @pre `out_src.extent(0) == out_dst.extent(0)` is `true`
  */
 template <typename IdxT, typename ProbT>
-void rmat_rectangular_gen(raft::device_resources const& handle,
+void rmat_rectangular_gen(raft::resources const& handle,
                           raft::random::RngState& r,
                           raft::device_vector_view<const ProbT, IdxT> theta,
                           raft::device_vector_view<IdxT, IdxT> out_src,
@@ -125,7 +126,7 @@ void rmat_rectangular_gen(raft::device_resources const& handle,
  */
 template <typename IdxT, typename ProbT>
 void rmat_rectangular_gen(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::random::RngState& r,
   raft::device_vector_view<const ProbT, IdxT> theta,
   raft::device_mdspan<IdxT, raft::extents<IdxT, raft::dynamic_extent, 2>, raft::row_major> out,
@@ -152,7 +153,7 @@ void rmat_rectangular_gen(
  */
 template <typename IdxT, typename ProbT>
 void rmat_rectangular_gen(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::random::RngState& r,
   raft::device_mdspan<IdxT, raft::extents<IdxT, raft::dynamic_extent, 2>, raft::row_major> out,
   raft::device_vector_view<IdxT, IdxT> out_src,
@@ -179,7 +180,7 @@ void rmat_rectangular_gen(
  * @pre `out_src.extent(0) == out_dst.extent(0)` is `true`
  */
 template <typename IdxT, typename ProbT>
-void rmat_rectangular_gen(raft::device_resources const& handle,
+void rmat_rectangular_gen(raft::resources const& handle,
                           raft::random::RngState& r,
                           raft::device_vector_view<IdxT, IdxT> out_src,
                           raft::device_vector_view<IdxT, IdxT> out_dst,
@@ -204,7 +205,7 @@ void rmat_rectangular_gen(raft::device_resources const& handle,
  */
 template <typename IdxT, typename ProbT>
 void rmat_rectangular_gen(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::random::RngState& r,
   raft::device_mdspan<IdxT, raft::extents<IdxT, raft::dynamic_extent, 2>, raft::row_major> out,
   ProbT a,
diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh
index d03975d0db..c3b44a7577 100644
--- a/cpp/include/raft/random/rng.cuh
+++ b/cpp/include/raft/random/rng.cuh
@@ -22,7 +22,8 @@
 #include <cassert>
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <type_traits>
 #include <variant>
 
@@ -41,13 +42,14 @@ namespace raft::random {
  * @param[in] end end of the range
  */
 template <typename OutputValueType, typename IndexType>
-void uniform(raft::device_resources const& handle,
+void uniform(raft::resources const& handle,
              RngState& rng_state,
              raft::device_vector_view<OutputValueType, IndexType> out,
              OutputValueType start,
              OutputValueType end)
 {
-  detail::uniform(rng_state, out.data_handle(), out.extent(0), start, end, handle.get_stream());
+  detail::uniform(
+    rng_state, out.data_handle(), out.extent(0), start, end, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -63,14 +65,14 @@ void uniform(raft::device_resources const& handle,
  * @param[in] end end of the range
  */
 template <typename OutType, typename LenType = int>
-void uniform(raft::device_resources const& handle,
+void uniform(raft::resources const& handle,
              RngState& rng_state,
              OutType* ptr,
              LenType len,
              OutType start,
              OutType end)
 {
-  detail::uniform(rng_state, ptr, len, start, end, handle.get_stream());
+  detail::uniform(rng_state, ptr, len, start, end, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -86,7 +88,7 @@ void uniform(raft::device_resources const& handle,
  * @param[in] end end of the range
  */
 template <typename OutputValueType, typename IndexType>
-void uniformInt(raft::device_resources const& handle,
+void uniformInt(raft::resources const& handle,
                 RngState& rng_state,
                 raft::device_vector_view<OutputValueType, IndexType> out,
                 OutputValueType start,
@@ -98,7 +100,8 @@ void uniformInt(raft::device_resources const& handle,
     "so that we can write to it.");
   static_assert(std::is_integral<OutputValueType>::value,
                 "uniformInt: The elements of the output vector must have integral type.");
-  detail::uniformInt(rng_state, out.data_handle(), out.extent(0), start, end, handle.get_stream());
+  detail::uniformInt(
+    rng_state, out.data_handle(), out.extent(0), start, end, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -114,14 +117,14 @@ void uniformInt(raft::device_resources const& handle,
  * @param[in] end end of the range
  */
 template <typename OutType, typename LenType = int>
-void uniformInt(raft::device_resources const& handle,
+void uniformInt(raft::resources const& handle,
                 RngState& rng_state,
                 OutType* ptr,
                 LenType len,
                 OutType start,
                 OutType end)
 {
-  detail::uniformInt(rng_state, ptr, len, start, end, handle.get_stream());
+  detail::uniformInt(rng_state, ptr, len, start, end, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -138,13 +141,14 @@ void uniformInt(raft::device_resources const& handle,
  * @param[in] sigma std-dev of the distribution
  */
 template <typename OutputValueType, typename IndexType>
-void normal(raft::device_resources const& handle,
+void normal(raft::resources const& handle,
             RngState& rng_state,
             raft::device_vector_view<OutputValueType, IndexType> out,
             OutputValueType mu,
             OutputValueType sigma)
 {
-  detail::normal(rng_state, out.data_handle(), out.extent(0), mu, sigma, handle.get_stream());
+  detail::normal(
+    rng_state, out.data_handle(), out.extent(0), mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -160,14 +164,14 @@ void normal(raft::device_resources const& handle,
  * @param[in] sigma std-dev of the distribution
  */
 template <typename OutType, typename LenType = int>
-void normal(raft::device_resources const& handle,
+void normal(raft::resources const& handle,
             RngState& rng_state,
             OutType* ptr,
             LenType len,
             OutType mu,
             OutType sigma)
 {
-  detail::normal(rng_state, ptr, len, mu, sigma, handle.get_stream());
+  detail::normal(rng_state, ptr, len, mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -183,7 +187,7 @@ void normal(raft::device_resources const& handle,
  * @param[in] sigma standard deviation of the distribution
  */
 template <typename OutputValueType, typename IndexType>
-void normalInt(raft::device_resources const& handle,
+void normalInt(raft::resources const& handle,
                RngState& rng_state,
                raft::device_vector_view<OutputValueType, IndexType> out,
                OutputValueType mu,
@@ -196,7 +200,8 @@ void normalInt(raft::device_resources const& handle,
   static_assert(std::is_integral<OutputValueType>::value,
                 "normalInt: The output vector's value type must be an integer.");
 
-  detail::normalInt(rng_state, out.data_handle(), out.extent(0), mu, sigma, handle.get_stream());
+  detail::normalInt(
+    rng_state, out.data_handle(), out.extent(0), mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -212,14 +217,14 @@ void normalInt(raft::device_resources const& handle,
  * @param[in] sigma std-dev of the distribution
  */
 template <typename IntType, typename LenType = int>
-void normalInt(raft::device_resources const& handle,
+void normalInt(raft::resources const& handle,
                RngState& rng_state,
                IntType* ptr,
                LenType len,
                IntType mu,
                IntType sigma)
 {
-  detail::normalInt(rng_state, ptr, len, mu, sigma, handle.get_stream());
+  detail::normalInt(rng_state, ptr, len, mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -244,7 +249,7 @@ void normalInt(raft::device_resources const& handle,
  */
 template <typename OutputValueType, typename IndexType>
 void normalTable(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   RngState& rng_state,
   raft::device_vector_view<const OutputValueType, IndexType> mu_vec,
   std::variant<raft::device_vector_view<const OutputValueType, IndexType>, OutputValueType> sigma,
@@ -283,7 +288,7 @@ void normalTable(
                       mu_vec.data_handle(),
                       sigma_vec_ptr,
                       sigma_value,
-                      handle.get_stream());
+                      resource::get_cuda_stream(handle));
 }
 
 /**
@@ -307,7 +312,7 @@ void normalTable(
  * @param[in] sigma scalar sigma to be used if 'sigma_vec' is nullptr
  */
 template <typename OutType, typename LenType = int>
-void normalTable(raft::device_resources const& handle,
+void normalTable(raft::resources const& handle,
                  RngState& rng_state,
                  OutType* ptr,
                  LenType n_rows,
@@ -317,7 +322,7 @@ void normalTable(raft::device_resources const& handle,
                  OutType sigma)
 {
   detail::normalTable(
-    rng_state, ptr, n_rows, n_cols, mu_vec, sigma_vec, sigma, handle.get_stream());
+    rng_state, ptr, n_rows, n_cols, mu_vec, sigma_vec, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -332,12 +337,12 @@ void normalTable(raft::device_resources const& handle,
  * @param[out] out the output vector
  */
 template <typename OutputValueType, typename IndexType>
-void fill(raft::device_resources const& handle,
+void fill(raft::resources const& handle,
           RngState& rng_state,
           OutputValueType val,
           raft::device_vector_view<OutputValueType, IndexType> out)
 {
-  detail::fill(rng_state, out.data_handle(), out.extent(0), val, handle.get_stream());
+  detail::fill(rng_state, out.data_handle(), out.extent(0), val, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -353,9 +358,9 @@ void fill(raft::device_resources const& handle,
  */
 template <typename OutType, typename LenType = int>
 void fill(
-  raft::device_resources const& handle, RngState& rng_state, OutType* ptr, LenType len, OutType val)
+  raft::resources const& handle, RngState& rng_state, OutType* ptr, LenType len, OutType val)
 {
-  detail::fill(rng_state, ptr, len, val, handle.get_stream());
+  detail::fill(rng_state, ptr, len, val, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -372,12 +377,13 @@ void fill(
  * @param[in] prob coin-toss probability for heads
  */
 template <typename OutputValueType, typename IndexType, typename Type>
-void bernoulli(raft::device_resources const& handle,
+void bernoulli(raft::resources const& handle,
                RngState& rng_state,
                raft::device_vector_view<OutputValueType, IndexType> out,
                Type prob)
 {
-  detail::bernoulli(rng_state, out.data_handle(), out.extent(0), prob, handle.get_stream());
+  detail::bernoulli(
+    rng_state, out.data_handle(), out.extent(0), prob, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -395,9 +401,9 @@ void bernoulli(raft::device_resources const& handle,
  */
 template <typename Type, typename OutType = bool, typename LenType = int>
 void bernoulli(
-  raft::device_resources const& handle, RngState& rng_state, OutType* ptr, LenType len, Type prob)
+  raft::resources const& handle, RngState& rng_state, OutType* ptr, LenType len, Type prob)
 {
-  detail::bernoulli(rng_state, ptr, len, prob, handle.get_stream());
+  detail::bernoulli(rng_state, ptr, len, prob, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -413,14 +419,14 @@ void bernoulli(
  * @param[in] scale scaling factor
  */
 template <typename OutputValueType, typename IndexType>
-void scaled_bernoulli(raft::device_resources const& handle,
+void scaled_bernoulli(raft::resources const& handle,
                       RngState& rng_state,
                       raft::device_vector_view<OutputValueType, IndexType> out,
                       OutputValueType prob,
                       OutputValueType scale)
 {
   detail::scaled_bernoulli(
-    rng_state, out.data_handle(), out.extent(0), prob, scale, handle.get_stream());
+    rng_state, out.data_handle(), out.extent(0), prob, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -436,14 +442,14 @@ void scaled_bernoulli(raft::device_resources const& handle,
  * @param[in] scale scaling factor
  */
 template <typename OutType, typename LenType = int>
-void scaled_bernoulli(raft::device_resources const& handle,
+void scaled_bernoulli(raft::resources const& handle,
                       RngState& rng_state,
                       OutType* ptr,
                       LenType len,
                       OutType prob,
                       OutType scale)
 {
-  detail::scaled_bernoulli(rng_state, ptr, len, prob, scale, handle.get_stream());
+  detail::scaled_bernoulli(rng_state, ptr, len, prob, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -460,13 +466,14 @@ void scaled_bernoulli(raft::device_resources const& handle,
  * @note https://en.wikipedia.org/wiki/Gumbel_distribution
  */
 template <typename OutputValueType, typename IndexType = int>
-void gumbel(raft::device_resources const& handle,
+void gumbel(raft::resources const& handle,
             RngState& rng_state,
             raft::device_vector_view<OutputValueType, IndexType> out,
             OutputValueType mu,
             OutputValueType beta)
 {
-  detail::gumbel(rng_state, out.data_handle(), out.extent(0), mu, beta, handle.get_stream());
+  detail::gumbel(
+    rng_state, out.data_handle(), out.extent(0), mu, beta, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -483,14 +490,14 @@ void gumbel(raft::device_resources const& handle,
  * @note https://en.wikipedia.org/wiki/Gumbel_distribution
  */
 template <typename OutType, typename LenType = int>
-void gumbel(raft::device_resources const& handle,
+void gumbel(raft::resources const& handle,
             RngState& rng_state,
             OutType* ptr,
             LenType len,
             OutType mu,
             OutType beta)
 {
-  detail::gumbel(rng_state, ptr, len, mu, beta, handle.get_stream());
+  detail::gumbel(rng_state, ptr, len, mu, beta, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -506,13 +513,14 @@ void gumbel(raft::device_resources const& handle,
  * @param[in] sigma standard deviation of the distribution
  */
 template <typename OutputValueType, typename IndexType>
-void lognormal(raft::device_resources const& handle,
+void lognormal(raft::resources const& handle,
                RngState& rng_state,
                raft::device_vector_view<OutputValueType, IndexType> out,
                OutputValueType mu,
                OutputValueType sigma)
 {
-  detail::lognormal(rng_state, out.data_handle(), out.extent(0), mu, sigma, handle.get_stream());
+  detail::lognormal(
+    rng_state, out.data_handle(), out.extent(0), mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -528,14 +536,14 @@ void lognormal(raft::device_resources const& handle,
  * @param[in] sigma standard deviation of the distribution
  */
 template <typename OutType, typename LenType = int>
-void lognormal(raft::device_resources const& handle,
+void lognormal(raft::resources const& handle,
                RngState& rng_state,
                OutType* ptr,
                LenType len,
                OutType mu,
                OutType sigma)
 {
-  detail::lognormal(rng_state, ptr, len, mu, sigma, handle.get_stream());
+  detail::lognormal(rng_state, ptr, len, mu, sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -551,13 +559,14 @@ void lognormal(raft::device_resources const& handle,
  * @param[in] scale scale value
  */
 template <typename OutputValueType, typename IndexType = int>
-void logistic(raft::device_resources const& handle,
+void logistic(raft::resources const& handle,
               RngState& rng_state,
               raft::device_vector_view<OutputValueType, IndexType> out,
               OutputValueType mu,
               OutputValueType scale)
 {
-  detail::logistic(rng_state, out.data_handle(), out.extent(0), mu, scale, handle.get_stream());
+  detail::logistic(
+    rng_state, out.data_handle(), out.extent(0), mu, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -573,14 +582,14 @@ void logistic(raft::device_resources const& handle,
  * @param[in] scale scale value
  */
 template <typename OutType, typename LenType = int>
-void logistic(raft::device_resources const& handle,
+void logistic(raft::resources const& handle,
               RngState& rng_state,
               OutType* ptr,
               LenType len,
               OutType mu,
               OutType scale)
 {
-  detail::logistic(rng_state, ptr, len, mu, scale, handle.get_stream());
+  detail::logistic(rng_state, ptr, len, mu, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -595,12 +604,13 @@ void logistic(raft::device_resources const& handle,
  * @param[in] lambda the exponential distribution's lambda parameter
  */
 template <typename OutputValueType, typename IndexType>
-void exponential(raft::device_resources const& handle,
+void exponential(raft::resources const& handle,
                  RngState& rng_state,
                  raft::device_vector_view<OutputValueType, IndexType> out,
                  OutputValueType lambda)
 {
-  detail::exponential(rng_state, out.data_handle(), out.extent(0), lambda, handle.get_stream());
+  detail::exponential(
+    rng_state, out.data_handle(), out.extent(0), lambda, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -615,13 +625,10 @@ void exponential(raft::device_resources const& handle,
  * @param[in] lambda the exponential distribution's lambda parameter
  */
 template <typename OutType, typename LenType = int>
-void exponential(raft::device_resources const& handle,
-                 RngState& rng_state,
-                 OutType* ptr,
-                 LenType len,
-                 OutType lambda)
+void exponential(
+  raft::resources const& handle, RngState& rng_state, OutType* ptr, LenType len, OutType lambda)
 {
-  detail::exponential(rng_state, ptr, len, lambda, handle.get_stream());
+  detail::exponential(rng_state, ptr, len, lambda, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -636,12 +643,13 @@ void exponential(raft::device_resources const& handle,
  * @param[in] sigma the distribution's sigma parameter
  */
 template <typename OutputValueType, typename IndexType>
-void rayleigh(raft::device_resources const& handle,
+void rayleigh(raft::resources const& handle,
               RngState& rng_state,
               raft::device_vector_view<OutputValueType, IndexType> out,
               OutputValueType sigma)
 {
-  detail::rayleigh(rng_state, out.data_handle(), out.extent(0), sigma, handle.get_stream());
+  detail::rayleigh(
+    rng_state, out.data_handle(), out.extent(0), sigma, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -656,15 +664,11 @@ void rayleigh(raft::device_resources const& handle,
  * @param[in] sigma the distribution's sigma parameter
  */
 template <typename OutType, typename LenType = int>
-void rayleigh(raft::device_resources const& handle,
-              RngState& rng_state,
-              OutType* ptr,
-              LenType len,
-              OutType sigma)
+void rayleigh(
+  raft::resources const& handle, RngState& rng_state, OutType* ptr, LenType len, OutType sigma)
 {
-  detail::rayleigh(rng_state, ptr, len, sigma, handle.get_stream());
+  detail::rayleigh(rng_state, ptr, len, sigma, resource::get_cuda_stream(handle));
 }
-
 /**
  * @brief Generate laplace distributed random numbers
  *
@@ -678,13 +682,14 @@ void rayleigh(raft::device_resources const& handle,
  * @param[in] scale the scale
  */
 template <typename OutputValueType, typename IndexType>
-void laplace(raft::device_resources const& handle,
+void laplace(raft::resources const& handle,
              RngState& rng_state,
              raft::device_vector_view<OutputValueType, IndexType> out,
              OutputValueType mu,
              OutputValueType scale)
 {
-  detail::laplace(rng_state, out.data_handle(), out.extent(0), mu, scale, handle.get_stream());
+  detail::laplace(
+    rng_state, out.data_handle(), out.extent(0), mu, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -700,14 +705,14 @@ void laplace(raft::device_resources const& handle,
  * @param[in] scale the scale
  */
 template <typename OutType, typename LenType = int>
-void laplace(raft::device_resources const& handle,
+void laplace(raft::resources const& handle,
              RngState& rng_state,
              OutType* ptr,
              LenType len,
              OutType mu,
              OutType scale)
 {
-  detail::laplace(rng_state, ptr, len, mu, scale, handle.get_stream());
+  detail::laplace(rng_state, ptr, len, mu, scale, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -716,10 +721,10 @@ void laplace(raft::device_resources const& handle,
  * Usage example:
  * @code{.cpp}
  *  #include <raft/core/device_mdarray.hpp>
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/random/rng.cuh>
  *
- *  raft::raft::device_resources handle;
+ *  raft::resources handle;
  *  ...
  *  raft::random::RngState rng(seed);
  *  auto indices = raft::make_device_vector<int>(handle, n_samples);
@@ -737,7 +742,7 @@ void laplace(raft::device_resources const& handle,
  */
 template <typename OutType, typename WeightType, typename IndexType>
 std::enable_if_t<std::is_integral_v<OutType>> discrete(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   RngState& rng_state,
   raft::device_vector_view<OutType, IndexType> out,
   raft::device_vector_view<const WeightType, IndexType> weights)
@@ -747,7 +752,7 @@ std::enable_if_t<std::is_integral_v<OutType>> discrete(
                    weights.data_handle(),
                    out.extent(0),
                    weights.extent(0),
-                   handle.get_stream());
+                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -770,7 +775,7 @@ std::enable_if_t<std::is_integral_v<OutType>> discrete(
  * @param[in] len input array length
  */
 template <typename DataT, typename WeightsT, typename IdxT = int>
-void sampleWithoutReplacement(raft::device_resources const& handle,
+void sampleWithoutReplacement(raft::resources const& handle,
                               RngState& rng_state,
                               DataT* out,
                               IdxT* outIdx,
@@ -780,7 +785,7 @@ void sampleWithoutReplacement(raft::device_resources const& handle,
                               IdxT len)
 {
   detail::sampleWithoutReplacement(
-    rng_state, out, outIdx, in, wts, sampledLen, len, handle.get_stream());
+    rng_state, out, outIdx, in, wts, sampledLen, len, resource::get_cuda_stream(handle));
 }
 
 /**
@@ -1106,7 +1111,7 @@ class DEPR Rng : public detail::RngImpl {
    * @param stream cuda stream
    */
   template <typename DataT, typename WeightsT, typename IdxT = int>
-  void sampleWithoutReplacement(raft::device_resources const& handle,
+  void sampleWithoutReplacement(raft::resources const& handle,
                                 DataT* out,
                                 IdxT* outIdx,
                                 const DataT* in,
diff --git a/cpp/include/raft/random/sample_without_replacement.cuh b/cpp/include/raft/random/sample_without_replacement.cuh
index be8bda8cd3..b074f68af6 100644
--- a/cpp/include/raft/random/sample_without_replacement.cuh
+++ b/cpp/include/raft/random/sample_without_replacement.cuh
@@ -21,7 +21,8 @@
 #include <cassert>
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <type_traits>
 #include <variant>
 
@@ -93,7 +94,7 @@ using weight_t = typename weight_alias<T>::type;
  *   equals the number of inputs `in.extent(0)`.
  */
 template <typename DataT, typename IdxT, typename WeightsVectorType, class OutIndexVectorType>
-void sample_without_replacement(raft::device_resources const& handle,
+void sample_without_replacement(raft::resources const& handle,
                                 RngState& rng_state,
                                 raft::device_vector_view<const DataT, IdxT> in,
                                 WeightsVectorType&& weights_opt,
@@ -144,7 +145,7 @@ void sample_without_replacement(raft::device_resources const& handle,
                                    wts_ptr,
                                    sampledLen,
                                    len,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /**
diff --git a/cpp/test/core/temporary_device_buffer.cu b/cpp/test/core/temporary_device_buffer.cu
index 52a2ec4c9b..cc8af24f10 100644
--- a/cpp/test/core/temporary_device_buffer.cu
+++ b/cpp/test/core/temporary_device_buffer.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 
+#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/temporary_device_buffer.hpp>
 
diff --git a/cpp/test/distance/dist_adj.cu b/cpp/test/distance/dist_adj.cu
index bb63cc9be3..413e548532 100644
--- a/cpp/test/distance/dist_adj.cu
+++ b/cpp/test/distance/dist_adj.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu
index 17955abb34..6f5800dd8f 100644
--- a/cpp/test/linalg/transpose.cu
+++ b/cpp/test/linalg/transpose.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 
+#include <raft/core/device_resources.hpp>
 #include <raft/linalg/transpose.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu
index 2292772b1a..9a65918f8f 100644
--- a/cpp/test/matrix/columnSort.cu
+++ b/cpp/test/matrix/columnSort.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <numeric>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/matrix/col_wise_sort.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/test/random/make_blobs.cu b/cpp/test/random/make_blobs.cu
index c2dbc5dc1c..0565635e3b 100644
--- a/cpp/test/random/make_blobs.cu
+++ b/cpp/test/random/make_blobs.cu
@@ -18,6 +18,8 @@
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_resources.hpp>
+
 #include <raft/random/make_blobs.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/test/random/make_regression.cu b/cpp/test/random/make_regression.cu
index 7508b57bdd..74aa00171b 100644
--- a/cpp/test/random/make_regression.cu
+++ b/cpp/test/random/make_regression.cu
@@ -20,8 +20,10 @@
 #include <thrust/device_vector.h>
 
 #include "../test_utils.cuh"
+#include <raft/core/device_resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/subtract.cuh>
+
 #include <raft/linalg/transpose.cuh>
 #include <raft/random/make_regression.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/test/random/multi_variable_gaussian.cu
index 1aa8b6a555..a27dffc7bf 100644
--- a/cpp/test/random/multi_variable_gaussian.cu
+++ b/cpp/test/random/multi_variable_gaussian.cu
@@ -18,8 +18,10 @@
 #include <cmath>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/multi_variable_gaussian.cuh>
 #include <raft/util/cudart_utils.hpp>
+
 #include <random>
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/test/random/permute.cu b/cpp/test/random/permute.cu
index d5fcca270e..2c5ddf9d5a 100644
--- a/cpp/test/random/permute.cu
+++ b/cpp/test/random/permute.cu
@@ -16,8 +16,10 @@
 
 #include "../test_utils.cuh"
 #include <algorithm>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/permute.cuh>
 #include <raft/random/rng.cuh>
+
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <vector>
diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/test/random/rmat_rectangular_generator.cu
index aae3898389..fd9a8ec732 100644
--- a/cpp/test/random/rmat_rectangular_generator.cu
+++ b/cpp/test/random/rmat_rectangular_generator.cu
@@ -21,8 +21,10 @@
 
 #include "../test_utils.cuh"
 
+#include <raft/core/device_resources.hpp>
 #include <raft/random/rmat_rectangular_generator.cuh>
 #include <raft/random/rng.cuh>
+
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index d3b8e44b05..92f79b1fa0 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -20,9 +20,11 @@
 #include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
 #include <raft/stats/stddev.cuh>
+
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/random/rng_discrete.cu b/cpp/test/random/rng_discrete.cu
index 741f7c65e0..b9b283b87d 100644
--- a/cpp/test/random/rng_discrete.cu
+++ b/cpp/test/random/rng_discrete.cu
@@ -18,9 +18,11 @@
 #include <algorithm>
 #include <cmath>
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/random/rng.cuh>
+
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <vector>
diff --git a/cpp/test/random/rng_int.cu b/cpp/test/random/rng_int.cu
index 83300b3ecc..8208b04489 100644
--- a/cpp/test/random/rng_int.cu
+++ b/cpp/test/random/rng_int.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/test/random/sample_without_replacement.cu
index ae5a58da3d..dcad32ce8a 100644
--- a/cpp/test/random/sample_without_replacement.cu
+++ b/cpp/test/random/sample_without_replacement.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/random/sample_without_replacement.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu
index 9ad7998180..c6c3dd48ca 100644
--- a/cpp/test/stats/histogram.cu
+++ b/cpp/test/stats/histogram.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/core/interruptible.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/histogram.cuh>
diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu
index 8b58f9692a..e0dc77520d 100644
--- a/cpp/test/stats/minmax.cu
+++ b/cpp/test/stats/minmax.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <limits>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/minmax.cuh>
 #include <raft/util/cuda_utils.cuh>

From 3978b32524048f0e605963076d7f0b7914b7b5e1 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Fri, 5 May 2023 15:19:46 -0400
Subject: [PATCH 42/78] Support CUDA 12.0 for pip wheels (#1489)

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1489
---
 .github/workflows/build.yaml                    |  8 ++++----
 .github/workflows/pr.yaml                       | 17 +++++++----------
 .github/workflows/test.yaml                     |  6 ++----
 ci/release/apply_wheel_modifications.sh         |  5 +++++
 .../environments/all_cuda-118_arch-x86_64.yaml  |  2 +-
 dependencies.yaml                               |  2 +-
 python/pylibraft/pyproject.toml                 |  4 ++--
 7 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 00a3aac95a..80e0c8b216 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -66,7 +66,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@cuda-120-pip
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -100,7 +100,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@cuda-120-pip
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 4c4c545a78..fcb155d651 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -76,34 +76,31 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
     with:
       build_type: pull-request
       package-name: pylibraft
-      test-before-amd64: "pip install cupy-cuda11x"
-      # On arm also need to install cupy from the specific webpage.
-      test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
       test-smoketest: "python ./ci/wheel_smoke_test_pylibraft.py"
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
     with:
       build_type: pull-request
       package-name: raft_dask
       package-dir: python/raft-dask
-      before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft && python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl"
+      before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft && python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl"
       skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
     with:
       build_type: pull-request
       package-name: raft_dask
       # Always want to test against latest dask/distributed.
-      test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
-      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
+      test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
+      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test"
       test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 17b2d6f83b..d389c4e2a9 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -32,19 +32,17 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: pylibraft
-      test-before-amd64: "pip install cupy-cuda11x"
-      test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@manylinux_v2
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
index efc8f0c77c..fd6c2f929e 100755
--- a/ci/release/apply_wheel_modifications.sh
+++ b/ci/release/apply_wheel_modifications.sh
@@ -18,3 +18,8 @@ sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/pylibraft/pyproject.toml
 sed -i "s/^name = \"raft-dask\"/name = \"raft-dask${CUDA_SUFFIX}\"/g" python/raft-dask/pyproject.toml
 sed -i "s/pylibraft/pylibraft${CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
 sed -i "s/ucx-py/ucx-py${CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
+
+if [[ $CUDA_SUFFIX == "-cu12" ]]; then
+    sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pylibraft/pyproject.toml
+    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" python/pylibraft/pyproject.toml
+fi
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 5ba52c4fad..52256b4266 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -13,7 +13,7 @@ dependencies:
 - clang=16.0.1
 - cmake>=3.23.1,!=3.25.0
 - cuda-profiler-api=11.8.86
-- cuda-python >=11.7.1,<12.0
+- cuda-python>=11.7.1,<12.0
 - cudatoolkit=11.8
 - cupy>=12.0.0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index 630ba99c59..af747951d8 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -132,7 +132,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - &cuda_python cuda-python >=11.7.1,<12.0
+          - &cuda_python cuda-python>=11.7.1,<12.0
           - &rmm rmm==23.6.*
   checks:
     common:
diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml
index dc3095e2d1..4aa11b41ea 100644
--- a/python/pylibraft/pyproject.toml
+++ b/python/pylibraft/pyproject.toml
@@ -16,7 +16,7 @@
 
 requires = [
     "cmake>=3.23.1,!=3.25.0",
-    "cuda-python >=11.7.1,<12.0",
+    "cuda-python>=11.7.1,<12.0",
     "cython>=0.29,<0.30",
     "ninja",
     "rmm==23.6.*",
@@ -37,7 +37,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cuda-python >=11.7.1,<12.0",
+    "cuda-python>=11.7.1,<12.0",
     "numpy>=1.21",
     "rmm==23.6.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 576b22fccdd19f430c7f02bd29b4bc69dd2a0bf7 Mon Sep 17 00:00:00 2001
From: Allard Hendriksen <ahendriksen@nvidia.com>
Date: Fri, 5 May 2023 23:19:57 +0200
Subject: [PATCH 43/78] Inline get_cache_idx (#1492)

Related to issue #1490.

The `get_cache_idx` kernel is currently defined in an anonymous namespace. In #1490, it was reported that this could lead to linker errors in CUDA 11.4 in combination with (presumably?) clang.

I tried to reproduce the linker error in commit 01cf4099b206b6b37dcdc84e852507306b018af5 but could not, as described in the commit message.

This PR attempts to fix the linker errors by marking `get_cache_idx` as inline and by removing the anonymous namespace.

Authors:
  - Allard Hendriksen (https://github.com/ahendriksen)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1492
---
 cpp/include/raft/util/cache_util.cuh | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh
index b20982473f..bbd84d8bf2 100644
--- a/cpp/include/raft/util/cache_util.cuh
+++ b/cpp/include/raft/util/cache_util.cuh
@@ -303,9 +303,6 @@ __global__ void assign_cache_idx(const int* keys,
   }
 }
 
-/* Unnamed namespace is used to avoid multiple definition error for the
-  following non-template function */
-namespace {
 /**
  * @brief Get the cache indices for keys stored in the cache.
  *
@@ -331,15 +328,15 @@ namespace {
  * @param [out] is_cached whether the element is cached size[n]
  * @param [in] time iteration counter (used for time stamping)
  */
-__global__ void get_cache_idx(int* keys,
-                              int n,
-                              int* cached_keys,
-                              int n_cache_sets,
-                              int associativity,
-                              int* cache_time,
-                              int* cache_idx,
-                              bool* is_cached,
-                              int time)
+__global__ inline void get_cache_idx(int* keys,
+                                     int n,
+                                     int* cached_keys,
+                                     int n_cache_sets,
+                                     int associativity,
+                                     int* cache_time,
+                                     int* cache_idx,
+                                     bool* is_cached,
+                                     int time)
 {
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
   if (tid < n) {
@@ -363,6 +360,5 @@ __global__ void get_cache_idx(int* keys,
     }
   }
 }
-};  // end unnamed namespace
 };  // namespace cache
 };  // namespace raft

From 8816910b7e72544f36e5128a88ec127be9116643 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Tue, 9 May 2023 09:34:35 -0700
Subject: [PATCH 44/78] Re-use memory pool between benchmark runs (#1495)

Don't recreate a new memory pool for each benchmark, and instead re-use the pool.

This significantly speeds up running the benchmarks that use a cuda memory pool. As an example running
`time ./cpp/build/MATRIX_BENCH --benchmark_filter=SelectK/float/uint32_t.*/0/` which runs benchmarks for 9 different selection algorithms - the time to run the benchmarks is reduced from`36.317s` on branch-23.06 to `10.038s` with this change.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1495
---
 cpp/bench/prims/common/benchmark.hpp | 13 ++++++++++++-
 cpp/bench/prims/matrix/select_k.cu   |  4 ++--
 cpp/bench/prims/neighbors/knn.cuh    |  5 ++---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/prims/common/benchmark.hpp
index 4b6e1ba286..1e783eb338 100644
--- a/cpp/bench/prims/common/benchmark.hpp
+++ b/cpp/bench/prims/common/benchmark.hpp
@@ -113,8 +113,19 @@ class fixture {
   raft::device_resources handle;
   rmm::cuda_stream_view stream;
 
-  fixture() : stream{handle.get_stream()}
+  fixture(bool use_pool_memory_resource = false) : stream{handle.get_stream()}
   {
+    // Cache memory pool between test runs, since it is expensive to create.
+    // This speeds up the time required to run the select_k bench by over 3x.
+    // This is part of the fixture class here so that the pool will get cleaned
+    // up, rather than outliving the benchmarks that require it.
+    static std::unique_ptr<using_pool_memory_res> memory_pool;
+    if (use_pool_memory_resource) {
+      if (!memory_pool) { memory_pool.reset(new using_pool_memory_res()); }
+    } else if (memory_pool) {
+      memory_pool.reset();
+    }
+
     int l2_cache_size = 0;
     int device_id     = 0;
     RAFT_CUDA_TRY(cudaGetDevice(&device_id));
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 8e75280029..d0bc993cc1 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -42,7 +42,8 @@ using namespace raft::bench;  // NOLINT
 template <typename KeyT, typename IdxT, select::Algo Algo>
 struct selection : public fixture {
   explicit selection(const select::params& p)
-    : params_(p),
+    : fixture(true),
+      params_(p),
       in_dists_(p.batch_size * p.len, stream),
       in_ids_(p.batch_size * p.len, stream),
       out_dists_(p.batch_size * p.k, stream),
@@ -72,7 +73,6 @@ struct selection : public fixture {
   void run_benchmark(::benchmark::State& state) override  // NOLINT
   {
     device_resources handle{stream};
-    using_pool_memory_res res;
     try {
       std::ostringstream label_stream;
       label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index afb3bf9da3..8239fa4f89 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -222,7 +222,8 @@ struct brute_force_knn {
 template <typename ValT, typename IdxT, typename ImplT>
 struct knn : public fixture {
   explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope)
-    : params_(p),
+    : fixture(true),
+      params_(p),
       strategy_(strategy),
       scope_(scope),
       dev_mem_res_(strategy == TransferStrategy::MANAGED),
@@ -274,8 +275,6 @@ struct knn : public fixture {
         "device (TransferStrategy::NO_COPY)");
     }
 
-    using_pool_memory_res default_resource;
-
     try {
       std::ostringstream label_stream;
       label_stream << params_ << "#" << strategy_ << "#" << scope_;

From f60fb79f2dc3c6944a36cb14142ae3823ebcc33d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 9 May 2023 20:33:54 -0500
Subject: [PATCH 45/78] Update recipes to GTest version >=1.13.0 (#1501)

This PR updates GTest pinnings to >=1.13.0. This aligns with recent changes in rapids-cmake: https://github.com/rapidsai/rapids-cmake/pull/401.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1501
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 ++
 conda/recipes/libraft/conda_build_config.yaml    | 2 +-
 cpp/test/linalg/eig.cu                           | 4 ++++
 cpp/test/linalg/svd.cu                           | 4 ++++
 dependencies.yaml                                | 8 ++++++++
 5 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 52256b4266..aae2aa3d15 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -24,7 +24,9 @@ dependencies:
 - distributed==2023.3.2.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
+- gmock>=1.13.0
 - graphviz
+- gtest>=1.13.0
 - ipython
 - joblib>=0.11
 - libcublas-dev=11.11.3.6
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index 2a66f213a7..bec773d26d 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -17,7 +17,7 @@ nccl_version:
   - ">=2.9.9"
 
 gtest_version:
-  - "=1.10.0"
+  - ">=1.13.0"
 
 glog_version:
   - ">=0.6.0"
diff --git a/cpp/test/linalg/eig.cu b/cpp/test/linalg/eig.cu
index 5229e99d20..99d6cd0a31 100644
--- a/cpp/test/linalg/eig.cu
+++ b/cpp/test/linalg/eig.cu
@@ -273,5 +273,9 @@ INSTANTIATE_TEST_SUITE_P(EigTests, EigTestVecJacobiF, ::testing::ValuesIn(inputs
 
 INSTANTIATE_TEST_SUITE_P(EigTests, EigTestVecJacobiD, ::testing::ValuesIn(inputsd2));
 
+INSTANTIATE_TEST_SUITE_P(EigTests, EigTestVecCompareF, ::testing::ValuesIn(inputsf2));
+
+INSTANTIATE_TEST_SUITE_P(EigTests, EigTestVecCompareD, ::testing::ValuesIn(inputsd2));
+
 }  // namespace linalg
 }  // namespace raft
diff --git a/cpp/test/linalg/svd.cu b/cpp/test/linalg/svd.cu
index bd66459962..c780476a5f 100644
--- a/cpp/test/linalg/svd.cu
+++ b/cpp/test/linalg/svd.cu
@@ -202,6 +202,10 @@ INSTANTIATE_TEST_SUITE_P(SvdTests, SvdTestLeftVecF, ::testing::ValuesIn(inputsf2
 
 INSTANTIATE_TEST_SUITE_P(SvdTests, SvdTestLeftVecD, ::testing::ValuesIn(inputsd2));
 
+INSTANTIATE_TEST_SUITE_P(SvdTests, SvdTestRightVecF, ::testing::ValuesIn(inputsf2));
+
+INSTANTIATE_TEST_SUITE_P(SvdTests, SvdTestRightVecD, ::testing::ValuesIn(inputsd2));
+
 // INSTANTIATE_TEST_SUITE_P(SvdTests, SvdTestRightVecF,
 // ::testing::ValuesIn(inputsf2));
 
diff --git a/dependencies.yaml b/dependencies.yaml
index af747951d8..ccaf3fe0d8 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -10,6 +10,7 @@ files:
       - build_pylibraft
       - cudatoolkit
       - develop
+      - test_libraft
       - docs
       - run_raft_dask
       - run_pylibraft
@@ -29,6 +30,7 @@ files:
     output: none
     includes:
       - cudatoolkit
+      - test_libraft
   test_python:
     output: none
     includes:
@@ -216,6 +218,12 @@ dependencies:
               - *libcusolver114
               - *libcusparse_dev114
               - *libcusparse114
+  test_libraft:
+    common:
+      - output_types: [conda]
+        packages:
+          - gtest>=1.13.0
+          - gmock>=1.13.0
   docs:
     common:
       - output_types: [conda]

From 20141120f6ae2fd9ea1ea0648507af04594921ec Mon Sep 17 00:00:00 2001
From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com>
Date: Wed, 10 May 2023 04:46:12 +0200
Subject: [PATCH 46/78] Un-scale output distances (#1499)

Reverts the scaling of input data and queries that normally happens for int8/uint8 types before distance computation.
Reuses the ivf-pq post-processing step for that.

Along the way, replaces the custom `fragment_scale` helper with the `utils::mapping` that is shared among several ANN methods.

Solves https://github.com/rapidsai/raft/issues/1457

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1499
---
 .../neighbors/detail/cagra/cagra_search.cuh   | 19 +++++++++++++
 .../detail/cagra/compute_distance.hpp         |  6 ++--
 .../neighbors/detail/cagra/device_common.hpp  | 26 +----------------
 .../detail/cagra/search_multi_cta.cuh         |  5 +++-
 .../detail/cagra/search_multi_kernel.cuh      | 19 +++++++++----
 .../detail/cagra/search_single_cta.cuh        |  5 +++-
 .../raft/spatial/knn/detail/ann_utils.cuh     | 16 +++++++++--
 cpp/test/CMakeLists.txt                       |  2 ++
 cpp/test/neighbors/ann_cagra.cuh              |  6 +++-
 .../ann_cagra/test_int8_t_uint32_t.cu         | 28 +++++++++++++++++++
 .../ann_cagra/test_uint8_t_uint32_t.cu        | 28 +++++++++++++++++++
 11 files changed, 122 insertions(+), 38 deletions(-)
 create mode 100644 cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
 create mode 100644 cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu

diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
index 79cbb6198f..5902d1405f 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
@@ -16,6 +16,9 @@
 
 #pragma once
 
+#include <raft/neighbors/detail/ivf_pq_search.cuh>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/neighbors/cagra_types.hpp>
@@ -94,6 +97,22 @@ void search_main(raft::device_resources const& res,
             _num_executed_iterations,
             topk);
   }
+
+  static_assert(std::is_same_v<DistanceT, float>,
+                "only float distances are supported at the moment");
+  float* dist_out          = distances.data_handle();
+  const DistanceT* dist_in = distances.data_handle();
+  // We're converting the data from T to DistanceT during distance computation
+  // and divide the values by kDivisor. Here we restore the original scale.
+  constexpr float kScale = spatial::knn::detail::utils::config<T>::kDivisor /
+                           spatial::knn::detail::utils::config<DistanceT>::kDivisor;
+  ivf_pq::detail::postprocess_distances(dist_out,
+                                        dist_in,
+                                        index.metric(),
+                                        distances.extent(0),
+                                        distances.extent(1),
+                                        kScale,
+                                        res.get_stream());
 }
 /** @} */  // end group cagra
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
index 29c841c0b5..52e5c62169 100644
--- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
@@ -15,6 +15,8 @@
  */
 #pragma once
 
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include "device_common.hpp"
 #include "hashmap.hpp"
 #include "utils.hpp"
@@ -102,7 +104,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes(
             const uint32_t kv = k + v;
             // if (kv >= dataset_dim) break;
             DISTANCE_T diff = query_buffer[device::swizzling(kv)];
-            diff -= static_cast<float>(dl_buff[e].data[v]) * device::fragment_scale<DATA_T>();
+            diff -= spatial::knn::detail::utils::mapping<float>{}(dl_buff[e].data[v]);
             norm2 += diff * diff;
           }
         }
@@ -229,7 +231,7 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in
             const unsigned kv = k + v;
             diff              = query_buffer[device::swizzling(kv)];
           }
-          diff -= static_cast<float>(dl_buff[e].data[v]) * device::fragment_scale<DATA_T>();
+          diff -= spatial::knn::detail::utils::mapping<float>{}(dl_buff[e].data[v]);
           norm2 += diff * diff;
         }
       }
diff --git a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp
index 20f30d9f11..f9c81f3d25 100644
--- a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp
@@ -27,30 +27,6 @@ namespace device {
 // warpSize for compile time calculation
 constexpr unsigned warp_size = 32;
 
-// scaling factor for distance computation
-template <class T>
-_RAFT_HOST_DEVICE constexpr float fragment_scale();
-template <>
-_RAFT_HOST_DEVICE constexpr float fragment_scale<float>()
-{
-  return 1.0;
-};
-template <>
-_RAFT_HOST_DEVICE constexpr float fragment_scale<half>()
-{
-  return 1.0;
-};
-template <>
-_RAFT_HOST_DEVICE constexpr float fragment_scale<uint8_t>()
-{
-  return 1.0 / 256.0;
-};
-template <>
-_RAFT_HOST_DEVICE constexpr float fragment_scale<int8_t>()
-{
-  return 1.0 / 128.0;
-};
-
 /** Xorshift rondem number generator.
  *
  * See https://en.wikipedia.org/wiki/Xorshift#xorshift for reference.
@@ -73,4 +49,4 @@ _RAFT_DEVICE inline T swizzling(T x)
 }
 
 }  // namespace device
-}  // namespace raft::neighbors::experimental::cagra::detail
\ No newline at end of file
+}  // namespace raft::neighbors::experimental::cagra::detail
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
index 6148441bd0..99553632ac 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 #pragma once
+
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include <algorithm>
 #include <cassert>
 #include <iostream>
@@ -204,7 +207,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   for (unsigned i = threadIdx.x; i < MAX_DATASET_DIM; i += BLOCK_SIZE) {
     unsigned j = device::swizzling(i);
     if (i < dataset_dim) {
-      query_buffer[j] = static_cast<float>(query_ptr[i]) * device::fragment_scale<DATA_T>();
+      query_buffer[j] = spatial::knn::detail::utils::mapping<float>{}(query_ptr[i]);
     } else {
       query_buffer[j] = 0.0;
     }
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
index 629bed2aee..e3e9c8a655 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 #pragma once
+
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include <algorithm>
 #include <cassert>
 #include <iostream>
@@ -124,10 +127,12 @@ __global__ void random_pickup_kernel(
       random_data_frag, dataset_ptr + (dataset_dim * seed_index), dataset_dim);
 
     // Compute the norm of two data
-    const auto norm2 =
-      device::norm2<DISTANCE_T>(query_frag, random_data_frag, device::fragment_scale<DATA_T>()
-                                /*, scale*/
-      );
+    const auto norm2 = device::norm2<DISTANCE_T>(
+      query_frag,
+      random_data_frag,
+      static_cast<float>(1.0 / spatial::knn::detail::utils::config<DATA_T>::kDivisor)
+      /*, scale*/
+    );
 
     if (norm2 < best_norm2_team_local) {
       best_norm2_team_local = norm2;
@@ -335,8 +340,10 @@ __global__ void compute_distance_to_child_nodes_kernel(
     device::fragment<MAX_DATASET_DIM, DATA_T, TEAM_SIZE> frag_query;
     device::load_vector_sync(frag_query, query_ptr + blockIdx.y * data_dim, data_dim);
 
-    const auto norm2 =
-      device::norm2<DISTANCE_T>(frag_target, frag_query, device::fragment_scale<DATA_T>());
+    const auto norm2 = device::norm2<DISTANCE_T>(
+      frag_target,
+      frag_query,
+      static_cast<float>(1.0 / spatial::knn::detail::utils::config<DATA_T>::kDivisor));
 
     if (threadIdx.x % TEAM_SIZE == 0) {
       result_indices_ptr[ldd * blockIdx.y + global_team_id]   = child_id;
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
index fc87b952b0..531b30ba85 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 #pragma once
+
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include <algorithm>
 #include <cassert>
 #include <iostream>
@@ -592,7 +595,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
   for (unsigned i = threadIdx.x; i < MAX_DATASET_DIM; i += BLOCK_SIZE) {
     unsigned j = device::swizzling(i);
     if (i < dataset_dim) {
-      query_buffer[j] = static_cast<float>(query_ptr[i]) * device::fragment_scale<DATA_T>();
+      query_buffer[j] = spatial::knn::detail::utils::mapping<float>{}(query_ptr[i]);
     } else {
       query_buffer[j] = 0.0;
     }
diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
index dd291251b4..850b741dfd 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh
@@ -29,6 +29,8 @@
 #include <memory>
 #include <optional>
 
+#include <cuda_fp16.hpp>
+
 namespace raft::spatial::knn::detail::utils {
 
 /** Whether pointers are accessible on the device or on the host. */
@@ -136,12 +138,22 @@ struct with_mapped_memory_t {
 template <typename T>
 struct config {};
 
+template <>
+struct config<double> {
+  using value_t                    = double;
+  static constexpr double kDivisor = 1.0;
+};
 template <>
 struct config<float> {
   using value_t                    = float;
   static constexpr double kDivisor = 1.0;
 };
 template <>
+struct config<half> {
+  using value_t                    = half;
+  static constexpr double kDivisor = 1.0;
+};
+template <>
 struct config<uint8_t> {
   using value_t                    = uint32_t;
   static constexpr double kDivisor = 256.0;
@@ -169,13 +181,13 @@ struct mapping {
    * @{
    */
   template <typename S>
-  HDI auto operator()(const S& x) const -> std::enable_if_t<std::is_same_v<S, T>, T>
+  HDI constexpr auto operator()(const S& x) const -> std::enable_if_t<std::is_same_v<S, T>, T>
   {
     return x;
   };
 
   template <typename S>
-  HDI auto operator()(const S& x) const -> std::enable_if_t<!std::is_same_v<S, T>, T>
+  HDI constexpr auto operator()(const S& x) const -> std::enable_if_t<!std::is_same_v<S, T>, T>
   {
     constexpr double kMult = config<T>::kDivisor / config<S>::kDivisor;
     if constexpr (std::is_floating_point_v<S>) { return static_cast<T>(x * static_cast<S>(kMult)); }
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 7f45a6dd22..88ad7772c2 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -314,6 +314,8 @@ if(BUILD_TESTS)
     NEIGHBORS_TEST
     PATH
     test/neighbors/ann_cagra/test_float_uint32_t.cu
+    test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
+    test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
     test/neighbors/ann_ivf_flat/test_float_int64_t.cu
     test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
     test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index 8b8aa21fc9..f9df1f724f 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -82,6 +82,10 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
  protected:
   void testCagra()
   {
+    if (ps.dim * sizeof(DataT) % 8 != 0) {
+      GTEST_SKIP()
+        << "CAGRA requires the input data rows to be aligned at least to 8 bytes for now.";
+    }
     size_t queries_size = ps.n_queries * ps.k;
     std::vector<IdxT> indices_Cagra(queries_size);
     std::vector<IdxT> indices_naive(queries_size);
@@ -310,4 +314,4 @@ inline std::vector<AnnCagraInputs> generate_inputs()
 
 const std::vector<AnnCagraInputs> inputs = generate_inputs();
 
-}  // namespace raft::neighbors::experimental::cagra
\ No newline at end of file
+}  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
new file mode 100644
index 0000000000..f148ebc186
--- /dev/null
+++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../ann_cagra.cuh"
+
+namespace raft::neighbors::experimental::cagra {
+
+typedef AnnCagraTest<float, std::int8_t, std::uint32_t> AnnCagraTestI8;
+TEST_P(AnnCagraTestI8, AnnCagra) { this->testCagra(); }
+
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8, ::testing::ValuesIn(inputs));
+
+}  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
new file mode 100644
index 0000000000..087d7cec71
--- /dev/null
+++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../ann_cagra.cuh"
+
+namespace raft::neighbors::experimental::cagra {
+
+typedef AnnCagraTest<float, std::uint8_t, std::uint32_t> AnnCagraTestU8;
+TEST_P(AnnCagraTestU8, AnnCagra) { this->testCagra(); }
+
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8, ::testing::ValuesIn(inputs));
+
+}  // namespace raft::neighbors::experimental::cagra

From cc4a76ba19b4b01ca5cbf454365f5f48f4bf313c Mon Sep 17 00:00:00 2001
From: tsuki <12711693+enp1s0@users.noreply.github.com>
Date: Wed, 10 May 2023 23:24:37 +0900
Subject: [PATCH 47/78] CAGRA: Separate graph index sorting functionality from
 prune function (#1471)

# Changes

This PR separates the graph index sorting functionality from the CAGRA pruning function and creates a new function. (Related issue: https://github.com/rapidsai/raft/issues/1446)

# Unit test

I have included a new unit test for the sorting function. The test utilizes a separate dataset from the one used in the CAGRA main test to avoid the effect of rounding errors during norm computation between two vectors in the dataset. More details are in the source code.
https://github.com/enp1s0/raft/blob/ea6c449c260895e9125a591a4848eed06f5b72c4/cpp/test/neighbors/ann_cagra.cuh#L93-L96

# Issue
Close #1446

Authors:
  - tsuki (https://github.com/enp1s0)
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1471
---
 cpp/include/raft/neighbors/cagra.cuh          |  65 +++++--
 .../neighbors/detail/cagra/graph_core.cuh     | 131 +++++++++-----
 cpp/test/neighbors/ann_cagra.cuh              | 169 +++++++++++++++++-
 .../ann_cagra/test_float_uint32_t.cu          |   4 +
 .../ann_cagra/test_int8_t_uint32_t.cu         |   3 +
 .../ann_cagra/test_uint8_t_uint32_t.cu        |   4 +
 6 files changed, 306 insertions(+), 70 deletions(-)

diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh
index 90728efd70..87d370b54a 100644
--- a/cpp/include/raft/neighbors/cagra.cuh
+++ b/cpp/include/raft/neighbors/cagra.cuh
@@ -52,8 +52,8 @@ namespace raft::neighbors::experimental::cagra {
  * @code{.cpp}
  *   using namespace raft::neighbors;
  *   // use default index parameters
- *   ivf_pq::index_params build_params;
- *   ivf_pq::search_params search_params
+ *   cagra::index_params build_params;
+ *   cagra::search_params search_params
  *   auto knn_graph      = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
  *   // create knn graph
  *   cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params);
@@ -84,6 +84,49 @@ void build_knn_graph(raft::device_resources const& res,
   detail::build_knn_graph(res, dataset, knn_graph, refine_rate, build_params, search_params);
 }
 
+/**
+ * @brief Sort a KNN graph index.
+ * Preprocessing step for `cagra::prune`: If a KNN graph is not built using
+ * `cagra::build_knn_graph`, then it is necessary to call this function before calling
+ * `cagra::prune`. If the graph is built by `cagra::build_knn_graph`, it is already sorted and you
+ * do not need to call this function.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *   using namespace raft::neighbors;
+ *   cagra::index_params build_params;
+ *   auto knn_graph      = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
+ *   // build KNN graph not using `cagra::build_knn_graph`
+ *   // build(knn_graph, dataset, ...);
+ *   // sort graph index
+ *   sort_knn_graph(res, dataset.view(), knn_graph.view());
+ *   // prune graph
+ *   cagra::prune(res, dataset, knn_graph.view(), pruned_graph.view());
+ *   // Construct an index from dataset and pruned knn_graph
+ *   auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset, pruned_graph.view());
+ * @endcode
+ *
+ * @tparam DataT type of the data in the source dataset
+ * @tparam IdxT type of the indices in the source dataset
+ *
+ * @param[in] res raft resources
+ * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
+ * @param[in,out] knn_graph a matrix view (host or device) of the input knn graph [n_rows,
+ * knn_graph_degree]
+ */
+template <typename DataT,
+          typename IdxT = uint32_t,
+          typename d_accessor =
+            host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>,
+          typename g_accessor =
+            host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
+void sort_knn_graph(raft::device_resources const& res,
+                    mdspan<const DataT, matrix_extent<IdxT>, row_major, d_accessor> dataset,
+                    mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph)
+{
+  detail::graph::sort_knn_graph(res, dataset, knn_graph);
+}
+
 /**
  * @brief Prune a KNN graph.
  *
@@ -91,27 +134,21 @@ void build_knn_graph(raft::device_resources const& res,
  *
  * See [cagra::build_knn_graph](#cagra::build_knn_graph) for usage example
  *
- * @tparam T data element type
  * @tparam IdxT type of the indices in the source dataset
  *
  * @param[in] res raft resources
- * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
  * @param[in] knn_graph a matrix view (host or device) of the input knn graph [n_rows,
  * knn_graph_degree]
  * @param[out] new_graph a host matrix view of the pruned knn graph [n_rows, graph_degree]
  */
-template <class DATA_T,
-          typename IdxT = uint32_t,
-          typename d_accessor =
-            host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::device>,
+template <typename IdxT = uint32_t,
           typename g_accessor =
-            host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::host>>
+            host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
 void prune(raft::device_resources const& res,
-           mdspan<const DATA_T, matrix_extent<IdxT>, row_major, d_accessor> dataset,
            mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
            raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
 {
-  detail::graph::prune(res, dataset, knn_graph, new_graph);
+  detail::graph::prune(res, knn_graph, new_graph);
 }
 
 /**
@@ -138,11 +175,11 @@ void prune(raft::device_resources const& res,
  *   // create and fill the index from a [N, D] dataset
  *   auto index = cagra::build(res, index_params, dataset);
  *   // use default search parameters
- *   ivf_pq::search_params search_params;
+ *   cagra::search_params search_params;
  *   // search K nearest neighbours
  *   auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
  *   auto distances = raft::make_device_matrix<float>(res, n_queries, k);
- *   ivf_pq::search(res, search_params, index, queries, neighbors, distances);
+ *   cagra::search(res, search_params, index, queries, neighbors, distances);
  * @endcode
  *
  * @tparam T data element type
@@ -178,7 +215,7 @@ index<T, IdxT> build(raft::device_resources const& res,
 
   auto cagra_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), params.graph_degree);
 
-  prune<T, IdxT>(res, dataset, knn_graph.view(), cagra_graph.view());
+  prune<IdxT>(res, knn_graph.view(), cagra_graph.view());
 
   // Construct an index from dataset and pruned knn graph.
   return index<T, IdxT>(res, params.metric, dataset, cagra_graph.view());
diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
index 02055f2a4d..a08c83677b 100644
--- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
@@ -405,36 +405,24 @@ void shift_array(T* array, uint64_t num)
   }
 }
 
-/** Input arrays can be both host and device*/
-template <class DATA_T,
+template <typename DataT,
           typename IdxT = uint32_t,
           typename d_accessor =
-            host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::device>,
+            host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>,
           typename g_accessor =
-            host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::host>>
-void prune(raft::device_resources const& res,
-           mdspan<const DATA_T, matrix_extent<IdxT>, row_major, d_accessor> dataset,
-           mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
-           raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
+            host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
+void sort_knn_graph(raft::device_resources const& res,
+                    mdspan<const DataT, matrix_extent<IdxT>, row_major, d_accessor> dataset,
+                    mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph)
 {
-  RAFT_LOG_DEBUG(
-    "# Pruning kNN graph (size=%lu, degree=%lu)\n", knn_graph.extent(0), knn_graph.extent(1));
+  RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0),
+               "dataset size is expected to have the same number of graph index size");
+  const uint32_t dataset_size = dataset.extent(0);
+  const uint32_t dataset_dim  = dataset.extent(1);
+  const DataT* dataset_ptr    = dataset.data_handle();
 
-  RAFT_EXPECTS(
-    dataset.extent(0) == knn_graph.extent(0) && knn_graph.extent(0) == new_graph.extent(0),
-    "Each input array is expected to have the same number of rows");
-  RAFT_EXPECTS(new_graph.extent(1) <= knn_graph.extent(1),
-               "output graph cannot have more columns than input graph");
-  const uint32_t dataset_size        = dataset.extent(0);
-  const uint32_t dataset_dim         = dataset.extent(1);
-  const uint32_t input_graph_degree  = knn_graph.extent(1);
-  const uint32_t output_graph_degree = new_graph.extent(1);
-  const DATA_T* dataset_ptr          = dataset.data_handle();
-  uint32_t* input_graph_ptr          = (uint32_t*)knn_graph.data_handle();
-  uint32_t* output_graph_ptr         = new_graph.data_handle();
-  float scale                  = 1.0f / raft::spatial::knn::detail::utils::config<DATA_T>::kDivisor;
-  const std::size_t graph_size = dataset_size;
-  size_t array_size;
+  const uint32_t input_graph_degree = knn_graph.extent(1);
+  uint32_t* input_graph_ptr         = (uint32_t*)knn_graph.data_handle();
 
   // Setup GPUs
   int num_gpus = 0;
@@ -451,46 +439,48 @@ void prune(raft::device_resources const& res,
   }
   RAFT_CUDA_TRY(cudaSetDevice(0));
 
-  uint32_t graph_chunk_size     = graph_size;
-  uint32_t*** d_input_graph_ptr = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
-  graph_chunk_size              = (graph_size + num_gpus - 1) / num_gpus;
+  const uint32_t graph_size       = knn_graph.extent(0);
+  uint32_t*** d_input_graph_ptr   = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
+  const uint32_t graph_chunk_size = (graph_size + num_gpus - 1) / num_gpus;
   d_input_graph_ptr = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, input_graph_degree);
 
-  uint32_t dataset_chunk_size = dataset_size;
-  DATA_T*** d_dataset_ptr     = NULL;  // [num_gpus+1][...][...]
-  dataset_chunk_size          = (dataset_size + num_gpus - 1) / num_gpus;
+  DataT*** d_dataset_ptr            = NULL;  // [num_gpus+1][...][...]
+  const uint32_t dataset_chunk_size = (dataset_size + num_gpus - 1) / num_gpus;
   assert(dataset_chunk_size == graph_chunk_size);
-  d_dataset_ptr = mgpu_alloc<DATA_T>(num_gpus, dataset_chunk_size, dataset_dim);
+  d_dataset_ptr = mgpu_alloc<DataT>(num_gpus, dataset_chunk_size, dataset_dim);
 
-  mgpu_H2D<DATA_T>(
+  const float scale = 1.0f / raft::spatial::knn::detail::utils::config<DataT>::kDivisor;
+
+  mgpu_H2D<DataT>(
     d_dataset_ptr, dataset_ptr, num_gpus, dataset_size, dataset_chunk_size, dataset_dim);
 
-  //
-  // Sorting kNN graph
-  //
   double time_sort_start = cur_time();
   RAFT_LOG_DEBUG("# Sorting kNN Graph on GPUs ");
-  mgpu_H2D<uint32_t>(
-    d_input_graph_ptr, input_graph_ptr, num_gpus, graph_size, graph_chunk_size, input_graph_degree);
+  mgpu_H2D<uint32_t>(d_input_graph_ptr,
+                     input_graph_ptr,
+                     num_gpus,
+                     dataset_size,
+                     graph_chunk_size,
+                     input_graph_degree);
   void (*kernel_sort)(
-    DATA_T**, uint32_t, uint32_t, uint32_t, float, uint32_t**, uint32_t, uint32_t, uint32_t, int);
+    DataT**, uint32_t, uint32_t, uint32_t, float, uint32_t**, uint32_t, uint32_t, uint32_t, int);
   constexpr int numElementsPerThread = 4;
   dim3 threads_sort(1, 1, 1);
   if (input_graph_degree <= numElementsPerThread * 32) {
     constexpr int blockDim_x = 32;
-    kernel_sort              = kern_sort<DATA_T, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 64) {
     constexpr int blockDim_x = 64;
-    kernel_sort              = kern_sort<DATA_T, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 128) {
     constexpr int blockDim_x = 128;
-    kernel_sort              = kern_sort<DATA_T, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 256) {
     constexpr int blockDim_x = 256;
-    kernel_sort              = kern_sort<DATA_T, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else {
     fprintf(stderr,
@@ -510,7 +500,7 @@ void prune(raft::device_resources const& res,
                                                dataset_dim,
                                                scale,
                                                d_input_graph_ptr[i_gpu],
-                                               graph_size,
+                                               dataset_size,
                                                graph_chunk_size,
                                                input_graph_degree,
                                                i_gpu);
@@ -518,13 +508,60 @@ void prune(raft::device_resources const& res,
   RAFT_CUDA_TRY(cudaSetDevice(0));
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
   RAFT_LOG_DEBUG(".");
-  mgpu_D2H<uint32_t>(
-    d_input_graph_ptr, input_graph_ptr, num_gpus, graph_size, graph_chunk_size, input_graph_degree);
+  mgpu_D2H<uint32_t>(d_input_graph_ptr,
+                     input_graph_ptr,
+                     num_gpus,
+                     dataset_size,
+                     graph_chunk_size,
+                     input_graph_degree);
   RAFT_LOG_DEBUG("\n");
   double time_sort_end = cur_time();
   RAFT_LOG_DEBUG("# Sorting kNN graph time: %.1lf sec\n", time_sort_end - time_sort_start);
 
-  mgpu_free<DATA_T>(d_dataset_ptr, num_gpus);
+  mgpu_free<DataT>(d_dataset_ptr, num_gpus);
+}
+
+/** Input arrays can be both host and device*/
+template <typename IdxT = uint32_t,
+          typename g_accessor =
+            host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
+void prune(raft::device_resources const& res,
+           mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
+           raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
+{
+  RAFT_LOG_DEBUG(
+    "# Pruning kNN graph (size=%lu, degree=%lu)\n", knn_graph.extent(0), knn_graph.extent(1));
+
+  RAFT_EXPECTS(knn_graph.extent(0) == new_graph.extent(0),
+               "Each input array is expected to have the same number of rows");
+  RAFT_EXPECTS(new_graph.extent(1) <= knn_graph.extent(1),
+               "output graph cannot have more columns than input graph");
+  const uint32_t input_graph_degree  = knn_graph.extent(1);
+  const uint32_t output_graph_degree = new_graph.extent(1);
+  uint32_t* input_graph_ptr          = (uint32_t*)knn_graph.data_handle();
+  uint32_t* output_graph_ptr         = new_graph.data_handle();
+  const std::size_t graph_size       = new_graph.extent(0);
+  size_t array_size;
+
+  // Setup GPUs
+  int num_gpus = 0;
+
+  // Setup GPUs
+  RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus));
+  RAFT_LOG_DEBUG("# num_gpus: %d\n", num_gpus);
+  for (int self = 0; self < num_gpus; self++) {
+    RAFT_CUDA_TRY(cudaSetDevice(self));
+    for (int peer = 0; peer < num_gpus; peer++) {
+      if (self == peer) { continue; }
+      RAFT_CUDA_TRY(cudaDeviceEnablePeerAccess(peer, 0));
+    }
+  }
+  RAFT_CUDA_TRY(cudaSetDevice(0));
+
+  uint32_t graph_chunk_size     = graph_size;
+  uint32_t*** d_input_graph_ptr = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
+  graph_chunk_size              = (graph_size + num_gpus - 1) / num_gpus;
+  d_input_graph_ptr = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, input_graph_degree);
 
   //
   uint8_t* detour_count;  // [graph_size, input_graph_degree]
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index f9df1f724f..1096dc4fb0 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -40,6 +40,88 @@
 #include <vector>
 
 namespace raft::neighbors::experimental::cagra {
+namespace {
+// For sort_knn_graph test
+template <typename IdxT>
+void RandomSuffle(raft::host_matrix_view<IdxT, IdxT> index)
+{
+  for (IdxT i = 0; i < index.extent(0); i++) {
+    uint64_t rand       = i;
+    IdxT* const row_ptr = index.data_handle() + i * index.extent(1);
+    for (unsigned j = 0; j < index.extent(1); j++) {
+      // Swap two indices at random
+      rand          = raft::neighbors::experimental::cagra::detail::device::xorshift64(rand);
+      const auto i0 = rand % index.extent(1);
+      rand          = raft::neighbors::experimental::cagra::detail::device::xorshift64(rand);
+      const auto i1 = rand % index.extent(1);
+
+      const auto tmp = row_ptr[i0];
+      row_ptr[i0]    = row_ptr[i1];
+      row_ptr[i1]    = tmp;
+    }
+  }
+}
+
+template <typename DistanceT, typename DatatT, typename IdxT>
+testing::AssertionResult CheckOrder(raft::host_matrix_view<IdxT, IdxT> index_test,
+                                    raft::host_matrix_view<DatatT, IdxT> dataset)
+{
+  for (IdxT i = 0; i < index_test.extent(0); i++) {
+    const DatatT* const base_vec = dataset.data_handle() + i * dataset.extent(1);
+    const IdxT* const index_row  = index_test.data_handle() + i * index_test.extent(1);
+    DistanceT prev_distance      = 0;
+    for (unsigned j = 0; j < index_test.extent(1) - 1; j++) {
+      const DatatT* const target_vec = dataset.data_handle() + index_row[j] * dataset.extent(1);
+      DistanceT distance             = 0;
+      for (unsigned l = 0; l < dataset.extent(1); l++) {
+        const auto diff =
+          static_cast<DistanceT>(target_vec[l]) - static_cast<DistanceT>(base_vec[l]);
+        distance += diff * diff;
+      }
+      if (prev_distance > distance) {
+        return testing::AssertionFailure()
+               << "Wrong index order (row = " << i << ", neighbor_id = " << j
+               << "). (distance[neighbor_id-1] = " << prev_distance
+               << "should be larger than distance[neighbor_id] = " << distance << ")";
+      }
+      prev_distance = distance;
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+// Generate dataset to ensure no rounding error occurs in the norm computation of any two vectors.
+// When testing the CAGRA index sorting function, rounding errors can affect the norm and alter the
+// order of the index. To ensure the accuracy of the test, we utilize the dataset. The generation
+// method is based on the error-free transformation (EFT) method.
+__global__ void GenerateRoundingErrorFreeDataset_kernel(float* const ptr,
+                                                        const uint32_t size,
+                                                        const uint32_t resolution)
+{
+  const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= size) { return; }
+
+  const float u32 = *reinterpret_cast<const uint32_t*>(ptr + tid);
+  ptr[tid]        = u32 / resolution;
+}
+
+void GenerateRoundingErrorFreeDataset(float* const ptr,
+                                      const uint32_t n_row,
+                                      const uint32_t dim,
+                                      raft::random::Rng& rng,
+                                      cudaStream_t cuda_stream)
+{
+  const uint32_t size       = n_row * dim;
+  const uint32_t block_size = 256;
+  const uint32_t grid_size  = (size + block_size - 1) / block_size;
+
+  const uint32_t resolution = 1u << static_cast<unsigned>(std::floor((24 - std::log2(dim)) / 2));
+  rng.uniformInt(reinterpret_cast<uint32_t*>(ptr), size, 0u, resolution - 1, cuda_stream);
+
+  GenerateRoundingErrorFreeDataset_kernel<<<grid_size, block_size, 0, cuda_stream>>>(
+    ptr, size, resolution);
+}
+}  // namespace
 
 struct AnnCagraInputs {
   int n_queries;
@@ -107,7 +189,7 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
                                         stream_);
       update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
       update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
-      handle_.sync_stream(stream_);
+      handle_.sync_stream();
     }
 
     {
@@ -153,7 +235,7 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
 
         update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_);
         update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_);
-        handle_.sync_stream(stream_);
+        handle_.sync_stream();
       }
       // for (int i = 0; i < ps.n_queries; i++) {
       //   //  std::cout << "query " << i << std::end;
@@ -194,18 +276,18 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
     std::cout << "Done.\nRuning rng" << std::endl;
     raft::random::Rng r(1234ULL);
     if constexpr (std::is_same<DataT, float>{}) {
-      r.uniform(database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0), stream_);
-      r.uniform(search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0), stream_);
+      r.normal(database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0), stream_);
+      r.normal(search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0), stream_);
     } else {
       r.uniformInt(database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20), stream_);
       r.uniformInt(search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20), stream_);
     }
-    handle_.sync_stream(stream_);
+    handle_.sync_stream();
   }
 
   void TearDown() override
   {
-    handle_.sync_stream(stream_);
+    handle_.sync_stream();
     database.resize(0, stream_);
     search_queries.resize(0, stream_);
   }
@@ -218,6 +300,75 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
   rmm::device_uvector<DataT> search_queries;
 };
 
+template <typename DistanceT, typename DataT, typename IdxT>
+class AnnCagraSortTest : public ::testing::TestWithParam<AnnCagraInputs> {
+ public:
+  AnnCagraSortTest()
+    : ps(::testing::TestWithParam<AnnCagraInputs>::GetParam()), database(0, handle_.get_stream())
+  {
+  }
+
+ protected:
+  void testCagraSort()
+  {
+    {
+      // Step 1: Build a sorted KNN graph by CAGRA knn build
+      auto database_view = raft::make_device_matrix_view<const DataT, IdxT>(
+        (const DataT*)database.data(), ps.n_rows, ps.dim);
+      auto database_host = raft::make_host_matrix<DataT, IdxT>(ps.n_rows, ps.dim);
+      raft::copy(
+        database_host.data_handle(), database.data(), database.size(), handle_.get_stream());
+      auto database_host_view = raft::make_host_matrix_view<const DataT, IdxT>(
+        (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim);
+
+      cagra::index_params index_params;
+      auto knn_graph =
+        raft::make_host_matrix<IdxT, IdxT>(ps.n_rows, index_params.intermediate_graph_degree);
+
+      if (ps.host_dataset) {
+        cagra::build_knn_graph<DataT, IdxT>(handle_, database_host_view, knn_graph.view());
+      } else {
+        cagra::build_knn_graph<DataT, IdxT>(handle_, database_view, knn_graph.view());
+      };
+
+      handle_.sync_stream();
+      ASSERT_TRUE(CheckOrder<DistanceT>(knn_graph.view(), database_host.view()));
+
+      RandomSuffle(knn_graph.view());
+
+      cagra::sort_knn_graph(handle_, database_view, knn_graph.view());
+      handle_.sync_stream();
+
+      ASSERT_TRUE(CheckOrder<DistanceT>(knn_graph.view(), database_host.view()));
+    }
+  }
+
+  void SetUp() override
+  {
+    std::cout << "Resizing database: " << ps.n_rows * ps.dim << std::endl;
+    database.resize(((size_t)ps.n_rows) * ps.dim, handle_.get_stream());
+    std::cout << "Done.\nRuning rng" << std::endl;
+    raft::random::Rng r(1234ULL);
+    if constexpr (std::is_same<DataT, float>{}) {
+      GenerateRoundingErrorFreeDataset(database.data(), ps.n_rows, ps.dim, r, handle_.get_stream());
+    } else {
+      r.uniformInt(database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20), handle_.get_stream());
+    }
+    handle_.sync_stream();
+  }
+
+  void TearDown() override
+  {
+    handle_.sync_stream();
+    database.resize(0, handle_.get_stream());
+  }
+
+ private:
+  raft::device_resources handle_;
+  AnnCagraInputs ps;
+  rmm::device_uvector<DataT> database;
+};
+
 inline std::vector<AnnCagraInputs> generate_inputs()
 {
   // Todo(tfeher): MULTI_CTA tests a bug, consider disabling that mode.
@@ -238,7 +389,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
   auto inputs2 =
     raft::util::itertools::product<AnnCagraInputs>({100},
                                                    {1000},
-                                                   {2, 4, 8, 64, 128, 196, 256, 512, 1024},  // dim
+                                                   {8, 64, 128, 192, 256, 512, 1024},  // dim
                                                    {16},
                                                    {search_algo::AUTO},
                                                    {10},
@@ -282,7 +433,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
   inputs2 =
     raft::util::itertools::product<AnnCagraInputs>({100},
                                                    {10000, 20000},
-                                                   {30},
+                                                   {32},
                                                    {10},
                                                    {search_algo::AUTO},
                                                    {10},
@@ -297,7 +448,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
   inputs2 =
     raft::util::itertools::product<AnnCagraInputs>({100},
                                                    {10000, 20000},
-                                                   {30},
+                                                   {32},
                                                    {10},
                                                    {search_algo::AUTO},
                                                    {10},
diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
index 1497a515d2..adb44a9264 100644
--- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
@@ -23,6 +23,10 @@ namespace raft::neighbors::experimental::cagra {
 typedef AnnCagraTest<float, float, std::uint32_t> AnnCagraTestF;
 TEST_P(AnnCagraTestF, AnnCagra) { this->testCagra(); }
 
+typedef AnnCagraSortTest<float, float, std::uint32_t> AnnCagraSortTestF;
+TEST_P(AnnCagraSortTestF, AnnCagraSort) { this->testCagraSort(); }
+
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
index f148ebc186..11c986c189 100644
--- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
@@ -22,7 +22,10 @@ namespace raft::neighbors::experimental::cagra {
 
 typedef AnnCagraTest<float, std::int8_t, std::uint32_t> AnnCagraTestI8;
 TEST_P(AnnCagraTestI8, AnnCagra) { this->testCagra(); }
+typedef AnnCagraSortTest<float, std::int8_t, std::uint32_t> AnnCagraSortTestI8;
+TEST_P(AnnCagraSortTestI8, AnnCagraSort) { this->testCagraSort(); }
 
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
index 087d7cec71..51d4feeed2 100644
--- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
@@ -23,6 +23,10 @@ namespace raft::neighbors::experimental::cagra {
 typedef AnnCagraTest<float, std::uint8_t, std::uint32_t> AnnCagraTestU8;
 TEST_P(AnnCagraTestU8, AnnCagra) { this->testCagra(); }
 
+typedef AnnCagraSortTest<float, std::uint8_t, std::uint32_t> AnnCagraSortTestU8;
+TEST_P(AnnCagraSortTestU8, AnnCagraSort) { this->testCagraSort(); }
+
 INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra

From 1d1c5234f6d6b43f14f51a269c186f4b0ee1233a Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Wed, 10 May 2023 21:43:43 -0700
Subject: [PATCH 48/78] Generate dataset of select_k times (#1497)

This adds an optional flag (`--select_k_dataset`) to the MATRIX_BENCH that will turn on generating a grid search of benchmarks for different select_k algorithms.  Since this adds about 100x as many benchmarks to run as previous (90k vs 900), this is opt-in only right now. This will be used to learn a heuristic function in #1455

This also integrates the faiss block select top-k algorithm into this benchmarking, so that we can compare how it performs against the other select_k algorithms

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1497
---
 cpp/CMakeLists.txt                            |  4 +
 cpp/bench/prims/CMakeLists.txt                | 12 ++-
 cpp/bench/prims/matrix/main.cpp               | 41 ++++++++
 cpp/bench/prims/matrix/select_k.cu            | 93 ++++++++++++++++++-
 .../neighbors/detail/selection_faiss-ext.cuh  |  6 ++
 .../raft_internal/matrix/select_k.cuh         |  9 +-
 .../detail/selection_faiss_00_generate.py     |  4 +
 .../detail/selection_faiss_int64_t_double.cu  | 44 +++++++++
 .../detail/selection_faiss_int64_t_half.cu    | 44 +++++++++
 .../detail/selection_faiss_uint32_t_double.cu | 44 +++++++++
 .../detail/selection_faiss_uint32_t_half.cu   | 44 +++++++++
 11 files changed, 337 insertions(+), 8 deletions(-)
 create mode 100644 cpp/bench/prims/matrix/main.cpp
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_int64_t_double.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_int64_t_half.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_uint32_t_double.cu
 create mode 100644 cpp/src/neighbors/detail/selection_faiss_uint32_t_half.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index cddfa4b38d..5fe02ec794 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -323,6 +323,10 @@ if(RAFT_COMPILE_LIBRARY)
     src/neighbors/detail/selection_faiss_size_t_double.cu
     src/neighbors/detail/selection_faiss_size_t_float.cu
     src/neighbors/detail/selection_faiss_uint32_t_float.cu
+    src/neighbors/detail/selection_faiss_int64_t_double.cu
+    src/neighbors/detail/selection_faiss_int64_t_half.cu
+    src/neighbors/detail/selection_faiss_uint32_t_double.cu
+    src/neighbors/detail/selection_faiss_uint32_t_half.cu
     src/neighbors/ivf_flat_build_float_int64_t.cu
     src/neighbors/ivf_flat_build_int8_t_int64_t.cu
     src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt
index 505ca32886..c90886841b 100644
--- a/cpp/bench/prims/CMakeLists.txt
+++ b/cpp/bench/prims/CMakeLists.txt
@@ -116,8 +116,16 @@ if(BUILD_PRIMS_BENCH)
   )
 
   ConfigureBench(
-    NAME MATRIX_BENCH PATH bench/prims/matrix/argmin.cu bench/prims/matrix/gather.cu
-    bench/prims/matrix/select_k.cu bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
+    NAME
+    MATRIX_BENCH
+    PATH
+    bench/prims/matrix/argmin.cu
+    bench/prims/matrix/gather.cu
+    bench/prims/matrix/select_k.cu
+    bench/prims/matrix/main.cpp
+    OPTIONAL
+    LIB
+    EXPLICIT_INSTANTIATE_ONLY
   )
 
   ConfigureBench(
diff --git a/cpp/bench/prims/matrix/main.cpp b/cpp/bench/prims/matrix/main.cpp
new file mode 100644
index 0000000000..9cdb1c2546
--- /dev/null
+++ b/cpp/bench/prims/matrix/main.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <cstring>
+
+namespace raft::matrix {
+void add_select_k_dataset_benchmarks();
+}
+
+int main(int argc, char** argv)
+{
+  // if we're passed a 'select_k_dataset' flag, add in extra benchmarks
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "--select_k_dataset") == 0) {
+      raft::matrix::add_select_k_dataset_benchmarks();
+
+      // pop off the cmdline argument from argc/argv
+      for (int j = i; j < argc - 1; ++j)
+        argv[j] = argv[j + 1];
+      argc--;
+      break;
+    }
+  }
+  benchmark::Initialize(&argc, argv);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
+  benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index d0bc993cc1..22ec998f4f 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -36,7 +36,6 @@
 #include <type_traits>
 
 namespace raft::matrix {
-
 using namespace raft::bench;  // NOLINT
 
 template <typename KeyT, typename IdxT, select::Algo Algo>
@@ -72,17 +71,16 @@ struct selection : public fixture {
 
   void run_benchmark(::benchmark::State& state) override  // NOLINT
   {
-    device_resources handle{stream};
     try {
       std::ostringstream label_stream;
       label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k;
       if (params_.use_same_leading_bits) { label_stream << "#same-leading-bits"; }
       state.SetLabel(label_stream.str());
-      loop_on_state(state, [this, &handle]() {
+      loop_on_state(state, [this]() {
         select::select_k_impl<KeyT, IdxT>(handle,
                                           Algo,
                                           in_dists_.data(),
-                                          in_ids_.data(),
+                                          params_.use_index_input ? in_ids_.data() : NULL,
                                           params_.batch_size,
                                           params_.len,
                                           params_.k,
@@ -182,4 +180,91 @@ SELECTION_REGISTER(double, int64_t, kWarpFiltered);           // NOLINT
 SELECTION_REGISTER(double, int64_t, kWarpDistributed);        // NOLINT
 SELECTION_REGISTER(double, int64_t, kWarpDistributedShm);     // NOLINT
 
+// For learning a heuristic of which selection algorithm to use, we
+// have a couple of additional constraints when generating the dataset:
+// 1. We want these benchmarks to be optionally enabled from the commandline -
+//  there are thousands of them, and the run-time is non-trivial. This should be opt-in only
+// 2. We test out larger k values - that won't work for all algorithms. This requires filtering
+// the input parameters per algorithm.
+// This makes the code to generate this dataset different from the code above to
+// register other benchmarks
+#define SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, A, input)                               \
+  {                                                                                       \
+    using SelectK = selection<KeyT, IdxT, select::Algo::A>;                               \
+    std::stringstream name;                                                               \
+    name << "SelectKDataset/" << #KeyT "/" #IdxT "/" #A << "/" << input.batch_size << "/" \
+         << input.len << "/" << input.k << "/" << input.use_index_input;                  \
+    auto* b = ::benchmark::internal::RegisterBenchmarkInternal(                           \
+      new raft::bench::internal::Fixture<SelectK, select::params>(name.str(), input));    \
+    b->UseManualTime();                                                                   \
+    b->Unit(benchmark::kMillisecond);                                                     \
+  }
+
+const static size_t MAX_MEMORY = 16 * 1024 * 1024 * 1024ULL;
+
+// registers the input for all algorithms
+#define SELECTION_REGISTER_INPUT(KeyT, IdxT, input)                            \
+  {                                                                            \
+    size_t mem = input.batch_size * input.len * (sizeof(KeyT) + sizeof(IdxT)); \
+    if (mem < MAX_MEMORY) {                                                    \
+      SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kRadix8bits, input)            \
+      SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kRadix11bits, input)           \
+      SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kRadix11bitsExtraPass, input)  \
+      if (input.k <= raft::matrix::detail::select::warpsort::kMaxCapacity) {   \
+        SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpImmediate, input)       \
+        SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpFiltered, input)        \
+        SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpDistributed, input)     \
+        SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpDistributedShm, input)  \
+      }                                                                        \
+      if (input.k <= raft::neighbors::detail::kFaissMaxK<IdxT, KeyT>()) {      \
+        SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kFaissBlockSelect, input)    \
+      }                                                                        \
+    }                                                                          \
+  }
+
+void add_select_k_dataset_benchmarks()
+{
+  // define a uniform grid
+  std::vector<select::params> inputs;
+
+  size_t grid_increment = 1;
+  std::vector<int> k_vals;
+  for (size_t k = 0; k < 13; k += grid_increment) {
+    k_vals.push_back(1 << k);
+  }
+  // Add in values just past the limit for warp/faiss select
+  k_vals.push_back(257);
+  k_vals.push_back(2049);
+
+  const static bool select_min = true;
+  const static bool use_ids    = false;
+
+  for (size_t row = 0; row < 13; row += grid_increment) {
+    for (size_t col = 10; col < 28; col += grid_increment) {
+      for (auto k : k_vals) {
+        inputs.push_back(
+          select::params{size_t(1 << row), size_t(1 << col), k, select_min, use_ids});
+      }
+    }
+  }
+
+  // also add in some random values
+  std::default_random_engine rng(42);
+  std::uniform_real_distribution<> row_dist(0, 13);
+  std::uniform_real_distribution<> col_dist(10, 28);
+  std::uniform_real_distribution<> k_dist(0, 13);
+  for (size_t i = 0; i < 1024; ++i) {
+    auto row = static_cast<size_t>(pow(2, row_dist(rng)));
+    auto col = static_cast<size_t>(pow(2, col_dist(rng)));
+    auto k   = static_cast<int>(pow(2, k_dist(rng)));
+    inputs.push_back(select::params{row, col, k, select_min, use_ids});
+  }
+
+  for (auto& input : inputs) {
+    SELECTION_REGISTER_INPUT(double, int64_t, input);
+    SELECTION_REGISTER_INPUT(double, uint32_t, input);
+    SELECTION_REGISTER_INPUT(float, int64_t, input);
+    SELECTION_REGISTER_INPUT(float, uint32_t, input);
+  }
+}
 }  // namespace raft::matrix
diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
index 8636ee9596..c000a4810b 100644
--- a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
+++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
@@ -18,6 +18,7 @@
 
 #include <cstddef>                                            // size_t
 #include <cstdint>                                            // uint32_t
+#include <cuda_fp16.h>                                        // __half
 #include <raft/neighbors/detail/selection_faiss_helpers.cuh>  // kFaissMaxK
 #include <raft/util/raft_explicit.hpp>                        // RAFT_EXPLICIT
 
@@ -58,4 +59,9 @@ instantiate_raft_neighbors_detail_select_k(size_t, double);
 instantiate_raft_neighbors_detail_select_k(int, double);
 instantiate_raft_neighbors_detail_select_k(size_t, float);
 
+instantiate_raft_neighbors_detail_select_k(uint32_t, double);
+instantiate_raft_neighbors_detail_select_k(int64_t, double);
+instantiate_raft_neighbors_detail_select_k(uint32_t, __half);
+instantiate_raft_neighbors_detail_select_k(int64_t, __half);
+
 #undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh
index 3d7a11e91e..1af3859ce7 100644
--- a/cpp/internal/raft_internal/matrix/select_k.cuh
+++ b/cpp/internal/raft_internal/matrix/select_k.cuh
@@ -20,6 +20,7 @@
 #include <raft/matrix/detail/select_radix.cuh>
 #include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/matrix/select_k.cuh>
+#include <raft/neighbors/detail/selection_faiss.cuh>
 
 namespace raft::matrix::select {
 
@@ -52,7 +53,8 @@ enum class Algo {
   kWarpImmediate,
   kWarpFiltered,
   kWarpDistributed,
-  kWarpDistributedShm
+  kWarpDistributedShm,
+  kFaissBlockSelect
 };
 
 inline auto operator<<(std::ostream& os, const Algo& algo) -> std::ostream&
@@ -67,6 +69,7 @@ inline auto operator<<(std::ostream& os, const Algo& algo) -> std::ostream&
     case Algo::kWarpFiltered: return os << "kWarpFiltered";
     case Algo::kWarpDistributed: return os << "kWarpDistributed";
     case Algo::kWarpDistributedShm: return os << "kWarpDistributedShm";
+    case Algo::kFaissBlockSelect: return os << "kFaissBlockSelect";
     default: return os << "unknown enum value";
   }
 }
@@ -154,7 +157,9 @@ void select_k_impl(const device_resources& handle,
       return detail::select::warpsort::
         select_k_impl<T, IdxT, detail::select::warpsort::warp_sort_distributed_ext>(
           in, in_idx, batch_size, len, k, out, out_idx, select_min, stream);
+    case Algo::kFaissBlockSelect:
+      return neighbors::detail::select_k(
+        in, in_idx, batch_size, len, out, out_idx, select_min, k, stream);
   }
 }
-
 }  // namespace raft::matrix::select
diff --git a/cpp/src/neighbors/detail/selection_faiss_00_generate.py b/cpp/src/neighbors/detail/selection_faiss_00_generate.py
index 36ba56c9b3..386dd18e0c 100644
--- a/cpp/src/neighbors/detail/selection_faiss_00_generate.py
+++ b/cpp/src/neighbors/detail/selection_faiss_00_generate.py
@@ -57,6 +57,10 @@
 
 types = dict(
     uint32_t_float=("uint32_t", "float"),
+    uint32_t_double=("uint32_t", "double"),
+    uint32_t_half=("uint32_t", "half"),
+    int64_t_double=("int64_t", "double"),
+    int64_t_half=("int64_t", "half"),
     int32_t_float=("int32_t", "float"),
     long_float=("long", "float"),
     size_t_double=("size_t", "double"),
diff --git a/cpp/src/neighbors/detail/selection_faiss_int64_t_double.cu b/cpp/src/neighbors/detail/selection_faiss_int64_t_double.cu
new file mode 100644
index 0000000000..f824fdd479
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_int64_t_double.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(int64_t, double);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_int64_t_half.cu b/cpp/src/neighbors/detail/selection_faiss_int64_t_half.cu
new file mode 100644
index 0000000000..34ca525c64
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_int64_t_half.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(int64_t, half);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_uint32_t_double.cu b/cpp/src/neighbors/detail/selection_faiss_uint32_t_double.cu
new file mode 100644
index 0000000000..e39edbb031
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_uint32_t_double.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(uint32_t, double);
+
+#undef instantiate_raft_neighbors_detail_select_k
diff --git a/cpp/src/neighbors/detail/selection_faiss_uint32_t_half.cu b/cpp/src/neighbors/detail/selection_faiss_uint32_t_half.cu
new file mode 100644
index 0000000000..549509f6da
--- /dev/null
+++ b/cpp/src/neighbors/detail/selection_faiss_uint32_t_half.cu
@@ -0,0 +1,44 @@
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * NOTE: this file is generated by selection_faiss_00_generate.py
+ *
+ * Make changes there and run in this directory:
+ *
+ * > python selection_faiss_00_generate.py
+ *
+ */
+
+#include <cstddef>  // size_t
+#include <cstdint>  // uint32_t
+#include <raft/neighbors/detail/selection_faiss-inl.cuh>
+
+#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t)    \
+  template void raft::neighbors::detail::select_k(const key_t* inK,     \
+                                                  const payload_t* inV, \
+                                                  size_t n_rows,        \
+                                                  size_t n_cols,        \
+                                                  key_t* outK,          \
+                                                  payload_t* outV,      \
+                                                  bool select_min,      \
+                                                  int k,                \
+                                                  cudaStream_t stream)
+
+instantiate_raft_neighbors_detail_select_k(uint32_t, half);
+
+#undef instantiate_raft_neighbors_detail_select_k

From 2e73bda62ec1045489be989c0ecaaf883ae19dad Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 11 May 2023 02:23:55 -0400
Subject: [PATCH 49/78] Workaround for cuda 12 issue in cusparse (#1508)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Ben Frederickson (https://github.com/benfred)

URL: https://github.com/rapidsai/raft/pull/1508
---
 cpp/include/raft/spectral/detail/matrix_wrappers.hpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
index 73518e20ef..7128bfae32 100644
--- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
@@ -223,8 +223,11 @@ struct sparse_matrix_t {
     cusparseDnVecDescr_t vecX;
     RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x));
 
+    rmm::device_uvector<value_type> y_tmp(size_y, stream);
+    raft::copy(y_tmp.data(), y, size_y, stream);
+
     cusparseDnVecDescr_t vecY;
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y_tmp.data()));
 
     // get (scratch) external device buffer size:
     //
@@ -241,6 +244,8 @@ struct sparse_matrix_t {
     RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(
       cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream));
 
+    // FIXME: This is a workaround for a cusparse issue being encountered in CUDA 12
+    raft::copy(y, y_tmp.data(), size_y, stream);
     // free descriptors:
     //(TODO: maybe wrap them in a RAII struct?)
     //

From 6b94e4fd4de09f50527c172566f3433af69cb26b Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 11 May 2023 11:31:23 -0700
Subject: [PATCH 50/78] Remove raft/matrix/matrix.cuh includes (#1498)

The `raft/matrix/matrix.cuh` file has been marked as deprecated, and produces a compile warning when included. However it was still being referenced in a bunch of different spots within raft - making it hard to avoid these warnings.

Remove the includes, in favour of either the newer API's or in certain cases the detail API

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1498
---
 .../raft/cluster/detail/kmeans_balanced.cuh   |  1 -
 cpp/include/raft/linalg/detail/eig.cuh        | 32 +++++++----
 cpp/include/raft/linalg/detail/lstsq.cuh      |  1 -
 .../raft/linalg/detail/matrix_vector_op.cuh   | 53 ++++++++++++++----
 cpp/include/raft/linalg/detail/qr.cuh         |  7 ++-
 cpp/include/raft/linalg/detail/rsvd.cuh       | 54 ++++++++++---------
 cpp/include/raft/linalg/detail/svd.cuh        | 31 +++++++----
 cpp/include/raft/linalg/matrix_vector_op.cuh  |  3 +-
 cpp/include/raft/matrix/copy.cuh              | 20 ++++++-
 cpp/include/raft/matrix/diagonal.cuh          | 17 +++---
 cpp/include/raft/matrix/init.cuh              |  1 -
 cpp/include/raft/matrix/linewise_op.cuh       |  4 +-
 cpp/include/raft/matrix/print.cuh             |  1 -
 cpp/include/raft/matrix/sign_flip.cuh         |  1 -
 cpp/include/raft/matrix/slice.cuh             |  2 +-
 cpp/include/raft/matrix/sqrt.cuh              |  1 -
 cpp/include/raft/matrix/triangular.cuh        | 13 +++--
 .../raft/neighbors/detail/ivf_pq_build.cuh    |  8 ++-
 cpp/include/raft/neighbors/refine-inl.cuh     |  1 -
 .../raft/random/detail/make_regression.cuh    |  7 ++-
 .../raft/sparse/neighbors/detail/knn.cuh      |  1 -
 .../raft/spatial/knn/detail/ball_cover.cuh    | 20 +++----
 cpp/test/linalg/svd.cu                        |  1 -
 cpp/test/matrix/matrix.cu                     |  2 +-
 cpp/test/neighbors/ann_utils.cuh              |  9 ++--
 25 files changed, 191 insertions(+), 100 deletions(-)

diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
index eb89ebe402..9e5f7a7c9a 100644
--- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
@@ -37,7 +37,6 @@
 #include <raft/linalg/unary_op.cuh>
 #include <raft/matrix/argmin.cuh>
 #include <raft/matrix/gather.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/device_atomics.cuh>
 #include <raft/util/integer_utils.hpp>
diff --git a/cpp/include/raft/linalg/detail/eig.cuh b/cpp/include/raft/linalg/detail/eig.cuh
index 94493efb24..7896136631 100644
--- a/cpp/include/raft/linalg/detail/eig.cuh
+++ b/cpp/include/raft/linalg/detail/eig.cuh
@@ -19,7 +19,7 @@
 #include "cusolver_wrappers.hpp"
 #include <cuda_runtime_api.h>
 #include <raft/core/device_resources.hpp>
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/copy.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -52,7 +52,9 @@ void eigDC_legacy(raft::device_resources const& handle,
   rmm::device_uvector<math_t> d_work(lwork, stream);
   rmm::device_scalar<int> d_dev_info(stream);
 
-  raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);
+  raft::matrix::copy(handle,
+                     make_device_matrix_view<const math_t>(in, n_rows, n_cols),
+                     make_device_matrix_view<math_t>(eig_vectors, n_rows, n_cols));
 
   RAFT_CUSOLVER_TRY(cusolverDnsyevd(cusolverH,
                                     CUSOLVER_EIG_MODE_VECTOR,
@@ -108,7 +110,9 @@ void eigDC(raft::device_resources const& handle,
   rmm::device_scalar<int> d_dev_info(stream);
   std::vector<math_t> h_work(workspaceHost / sizeof(math_t));
 
-  raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);
+  raft::matrix::copy(handle,
+                     make_device_matrix_view<const math_t>(in, n_rows, n_cols),
+                     make_device_matrix_view<math_t>(eig_vectors, n_rows, n_cols));
 
   RAFT_CUSOLVER_TRY(cusolverDnxsyevd(cusolverH,
                                      dn_params,
@@ -191,7 +195,9 @@ void eigSelDC(raft::device_resources const& handle,
                                        stream));
   } else if (memUsage == COPY_INPUT) {
     d_eig_vectors.resize(n_rows * n_cols, stream);
-    raft::matrix::copy(in, d_eig_vectors.data(), n_rows, n_cols, stream);
+    raft::matrix::copy(handle,
+                       make_device_matrix_view<const math_t>(in, n_rows, n_cols),
+                       make_device_matrix_view(eig_vectors, n_rows, n_cols));
 
     RAFT_CUSOLVER_TRY(cusolverDnsyevdx(cusolverH,
                                        CUSOLVER_EIG_MODE_VECTOR,
@@ -220,10 +226,16 @@ void eigSelDC(raft::device_resources const& handle,
          "This usually occurs when some of the features do not vary enough.");
 
   if (memUsage == OVERWRITE_INPUT) {
-    raft::matrix::truncZeroOrigin(in, n_rows, eig_vectors, n_rows, n_eig_vals, stream);
+    raft::matrix::trunc_zero_origin(
+      handle,
+      make_device_matrix_view<const math_t, size_t, col_major>(in, n_rows, n_eig_vals),
+      make_device_matrix_view<math_t, size_t, col_major>(eig_vectors, n_rows, n_eig_vals));
   } else if (memUsage == COPY_INPUT) {
-    raft::matrix::truncZeroOrigin(
-      d_eig_vectors.data(), n_rows, eig_vectors, n_rows, n_eig_vals, stream);
+    raft::matrix::trunc_zero_origin(
+      handle,
+      make_device_matrix_view<const math_t, size_t, col_major>(
+        d_eig_vectors.data(), n_rows, n_eig_vals),
+      make_device_matrix_view<math_t, size_t, col_major>(eig_vectors, n_rows, n_eig_vals));
   }
 }
 
@@ -259,7 +271,9 @@ void eigJacobi(raft::device_resources const& handle,
   rmm::device_uvector<math_t> d_work(lwork, stream);
   rmm::device_scalar<int> dev_info(stream);
 
-  raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);
+  raft::matrix::copy(handle,
+                     make_device_matrix_view<const math_t>(in, n_rows, n_cols),
+                     make_device_matrix_view(eig_vectors, n_rows, n_cols));
 
   RAFT_CUSOLVER_TRY(cusolverDnsyevj(cusolverH,
                                     CUSOLVER_EIG_MODE_VECTOR,
@@ -283,4 +297,4 @@ void eigJacobi(raft::device_resources const& handle,
 
 }  // namespace detail
 }  // namespace linalg
-}  // namespace raft
\ No newline at end of file
+}  // namespace raft
diff --git a/cpp/include/raft/linalg/detail/lstsq.cuh b/cpp/include/raft/linalg/detail/lstsq.cuh
index 207bcefc32..fd6b00f9fd 100644
--- a/cpp/include/raft/linalg/detail/lstsq.cuh
+++ b/cpp/include/raft/linalg/detail/lstsq.cuh
@@ -28,7 +28,6 @@
 #include <raft/linalg/svd.cuh>
 #include <raft/linalg/transpose.cuh>
 #include <raft/matrix/math.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
diff --git a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
index 62ec9bb7a4..0c1261261c 100644
--- a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/linewise_op.cuh>
 
 namespace raft {
 namespace linalg {
@@ -33,10 +33,26 @@ void matrixVectorOp(MatT* out,
                     Lambda op,
                     cudaStream_t stream)
 {
-  IdxType stride = rowMajor ? D : N;
-  IdxType nLines = rowMajor ? N : D;
-  return matrix::linewiseOp(
-    out, matrix, stride, nLines, rowMajor == bcastAlongRows, op, stream, vec);
+  raft::device_resources handle(stream);
+
+  bool along_lines = rowMajor == bcastAlongRows;
+  if (rowMajor) {
+    matrix::linewise_op<MatT, IdxType, row_major, Lambda>(
+      handle,
+      make_device_matrix_view<const MatT, IdxType, row_major>(matrix, N, D),
+      make_device_matrix_view<MatT, IdxType, row_major>(out, N, D),
+      along_lines,
+      op,
+      make_device_vector_view<const VecT, IdxType>(vec, bcastAlongRows ? N : D));
+  } else {
+    matrix::linewise_op<MatT, IdxType, col_major, Lambda>(
+      handle,
+      make_device_matrix_view<const MatT, IdxType, col_major>(matrix, N, D),
+      make_device_matrix_view<MatT, IdxType, col_major>(out, N, D),
+      along_lines,
+      op,
+      make_device_vector_view<const VecT, IdxType>(vec, bcastAlongRows ? N : D));
+  }
 }
 
 template <typename MatT,
@@ -56,10 +72,27 @@ void matrixVectorOp(MatT* out,
                     Lambda op,
                     cudaStream_t stream)
 {
-  IdxType stride = rowMajor ? D : N;
-  IdxType nLines = rowMajor ? N : D;
-  return matrix::linewiseOp(
-    out, matrix, stride, nLines, rowMajor == bcastAlongRows, op, stream, vec1, vec2);
+  raft::device_resources handle(stream);
+  bool along_lines = rowMajor == bcastAlongRows;
+  if (rowMajor) {
+    matrix::linewise_op<MatT, IdxType, row_major, Lambda>(
+      handle,
+      make_device_matrix_view<const MatT, IdxType, row_major>(matrix, N, D),
+      make_device_matrix_view<MatT, IdxType, row_major>(out, N, D),
+      along_lines,
+      op,
+      make_device_vector_view<const Vec1T, IdxType>(vec1, bcastAlongRows ? N : D),
+      make_device_vector_view<const Vec2T, IdxType>(vec2, bcastAlongRows ? N : D));
+  } else {
+    matrix::linewise_op<MatT, IdxType, col_major, Lambda>(
+      handle,
+      make_device_matrix_view<const MatT, IdxType, col_major>(matrix, N, D),
+      make_device_matrix_view<MatT, IdxType, col_major>(out, N, D),
+      along_lines,
+      op,
+      make_device_vector_view<const Vec1T, IdxType>(vec1, bcastAlongRows ? N : D),
+      make_device_vector_view<const Vec2T, IdxType>(vec2, bcastAlongRows ? N : D));
+  }
 }
 
 };  // end namespace detail
diff --git a/cpp/include/raft/linalg/detail/qr.cuh b/cpp/include/raft/linalg/detail/qr.cuh
index bc7c551d89..16a721dfd3 100644
--- a/cpp/include/raft/linalg/detail/qr.cuh
+++ b/cpp/include/raft/linalg/detail/qr.cuh
@@ -20,7 +20,7 @@
 #include "cusolver_wrappers.hpp"
 #include <raft/core/resource/cusolver_dn_handle.hpp>
 #include <raft/core/resources.hpp>
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/triangular.cuh>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -132,7 +132,10 @@ void qrGetQR(raft::resources const& handle,
                                     devInfo.data(),
                                     stream));
 
-  raft::matrix::copyUpperTriangular(R_full.data(), R, m, n, stream);
+  raft::matrix::upper_triangular<math_t, int>(
+    handle,
+    make_device_matrix_view<const math_t, int, col_major>(R_full.data(), m, n),
+    make_device_matrix_view<math_t, int, col_major>(R, std::min(m, n), std::min(m, n)));
 
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(Q, R_full.data(), sizeof(math_t) * m * n, cudaMemcpyDeviceToDevice, stream));
diff --git a/cpp/include/raft/linalg/detail/rsvd.cuh b/cpp/include/raft/linalg/detail/rsvd.cuh
index a66a23179b..48b9e1d2db 100644
--- a/cpp/include/raft/linalg/detail/rsvd.cuh
+++ b/cpp/include/raft/linalg/detail/rsvd.cuh
@@ -21,8 +21,11 @@
 #include <raft/linalg/qr.cuh>
 #include <raft/linalg/svd.cuh>
 #include <raft/linalg/transpose.cuh>
+#include <raft/matrix/diagonal.cuh>
 #include <raft/matrix/math.cuh>
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/reverse.cuh>
+#include <raft/matrix/slice.cuh>
+#include <raft/matrix/triangular.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
 
@@ -202,15 +205,13 @@ void rsvdFixedRank(raft::device_resources const& handle,
                           true,
                           true,
                           stream);
-    raft::matrix::sliceMatrix(S_vec_tmp.data(),
-                              1,
-                              l,
-                              S_vec,
-                              0,
-                              0,
-                              1,
-                              k,
-                              stream);  // First k elements of S_vec
+
+    // First k elements of S_vec
+    raft::matrix::slice(
+      handle,
+      make_device_matrix_view<const math_t, int, col_major>(S_vec_tmp.data(), 1, l),
+      make_device_matrix_view<math_t, int, col_major>(S_vec, 1, k),
+      raft::matrix::slice_coordinates(0, 0, 1, k));
 
     // Merge step 14 & 15 by calculating U = Q*Vhat[:,1:k] mxl * lxk = mxk
     if (gen_left_vec) {
@@ -272,23 +273,26 @@ void rsvdFixedRank(raft::device_resources const& handle,
     RAFT_CUDA_TRY(cudaMemsetAsync(Uhat.data(), 0, sizeof(math_t) * l * l, stream));
     rmm::device_uvector<math_t> Uhat_dup(l * l, stream);
     RAFT_CUDA_TRY(cudaMemsetAsync(Uhat_dup.data(), 0, sizeof(math_t) * l * l, stream));
-    raft::matrix::copyUpperTriangular(BBt.data(), Uhat_dup.data(), l, l, stream);
+
+    raft::matrix::upper_triangular(
+      handle,
+      make_device_matrix_view<const math_t, int, col_major>(BBt.data(), l, l),
+      make_device_matrix_view<math_t, int, col_major>(Uhat_dup.data(), l, l));
+
     if (use_jacobi)
       raft::linalg::eigJacobi(
         handle, Uhat_dup.data(), l, l, Uhat.data(), S_vec_tmp.data(), stream, tol, max_sweeps);
     else
       raft::linalg::eigDC(handle, Uhat_dup.data(), l, l, Uhat.data(), S_vec_tmp.data(), stream);
     raft::matrix::seqRoot(S_vec_tmp.data(), l, stream);
-    raft::matrix::sliceMatrix(S_vec_tmp.data(),
-                              1,
-                              l,
-                              S_vec,
-                              0,
-                              p,
-                              1,
-                              l,
-                              stream);  // Last k elements of S_vec
-    raft::matrix::colReverse(S_vec, 1, k, stream);
+
+    auto S_vec_view = make_device_matrix_view<math_t, int, col_major>(S_vec, 1, k);
+    raft::matrix::slice(
+      handle,
+      raft::make_device_matrix_view<const math_t, int, col_major>(S_vec_tmp.data(), 1, l),
+      S_vec_view,
+      raft::matrix::slice_coordinates(0, p, 1, l));  // Last k elements of S_vec
+    raft::matrix::col_reverse(handle, S_vec_view);
 
     // Merge step 14 & 15 by calculating U = Q*Uhat[:,(p+1):l] mxl * lxk = mxk
     if (gen_left_vec) {
@@ -305,7 +309,7 @@ void rsvdFixedRank(raft::device_resources const& handle,
                          alpha,
                          beta,
                          stream);
-      raft::matrix::colReverse(U, m, k, stream);
+      raft::matrix::col_reverse(handle, make_device_matrix_view<math_t, int, col_major>(U, m, k));
     }
 
     // Merge step 14 & 15 by calculating V = B^T Uhat[:,(p+1):l] *
@@ -316,7 +320,9 @@ void rsvdFixedRank(raft::device_resources const& handle,
       rmm::device_uvector<math_t> UhatSinv(l * k, stream);
       RAFT_CUDA_TRY(cudaMemsetAsync(UhatSinv.data(), 0, sizeof(math_t) * l * k, stream));
       raft::matrix::reciprocal(S_vec_tmp.data(), l, stream);
-      raft::matrix::initializeDiagonalMatrix(S_vec_tmp.data() + p, Sinv.data(), k, k, stream);
+      raft::matrix::set_diagonal(handle,
+                                 make_device_vector_view<const math_t>(S_vec_tmp.data() + p, k),
+                                 make_device_matrix_view<math_t>(Sinv.data(), k, k));
 
       raft::linalg::gemm(handle,
                          Uhat.data() + p * l,
@@ -344,7 +350,7 @@ void rsvdFixedRank(raft::device_resources const& handle,
                          alpha,
                          beta,
                          stream);
-      raft::matrix::colReverse(V, n, k, stream);
+      raft::matrix::col_reverse(handle, make_device_matrix_view<math_t, int, col_major>(V, n, k));
     }
   }
 }
diff --git a/cpp/include/raft/linalg/detail/svd.cuh b/cpp/include/raft/linalg/detail/svd.cuh
index 998bea5b1b..94cd9e2789 100644
--- a/cpp/include/raft/linalg/detail/svd.cuh
+++ b/cpp/include/raft/linalg/detail/svd.cuh
@@ -24,8 +24,10 @@
 
 #include <raft/common/nvtx.hpp>
 #include <raft/core/device_resources.hpp>
+#include <raft/matrix/diagonal.cuh>
 #include <raft/matrix/math.cuh>
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/norm.cuh>
+#include <raft/matrix/reverse.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
@@ -139,8 +141,10 @@ void svdEig(raft::device_resources const& handle,
 
   raft::linalg::eigDC(handle, in_cross_mult.data(), n_cols, n_cols, V, S, stream);
 
-  raft::matrix::colReverse(V, n_cols, n_cols, stream);
-  raft::matrix::rowReverse(S, n_cols, idx_t(1), stream);
+  raft::matrix::col_reverse(handle,
+                            make_device_matrix_view<math_t, idx_t, col_major>(V, n_cols, n_cols));
+  raft::matrix::row_reverse(handle,
+                            make_device_matrix_view<math_t, idx_t, col_major>(S, n_cols, idx_t(1)));
 
   raft::matrix::seqRoot(S, S, alpha, n_cols, stream, true);
 
@@ -285,15 +289,19 @@ bool evaluateSVDByL2Norm(raft::device_resources const& handle,
   RAFT_CUDA_TRY(cudaMemsetAsync(P_d.data(), 0, sizeof(math_t) * m * n, stream));
   RAFT_CUDA_TRY(cudaMemsetAsync(S_mat.data(), 0, sizeof(math_t) * k * k, stream));
 
-  raft::matrix::initializeDiagonalMatrix(S_vec, S_mat.data(), k, k, stream);
+  raft::matrix::set_diagonal(handle,
+                             make_device_vector_view<const math_t>(S_vec, k),
+                             make_device_matrix_view<math_t>(S_mat.data(), k, k));
   svdReconstruction(handle, U, S_mat.data(), V, P_d.data(), m, n, k, stream);
 
   // get norms of each
-  math_t normA = raft::matrix::getL2Norm(handle, A_d, m * n, stream);
-  math_t normU = raft::matrix::getL2Norm(handle, U, m * k, stream);
-  math_t normS = raft::matrix::getL2Norm(handle, S_mat.data(), k * k, stream);
-  math_t normV = raft::matrix::getL2Norm(handle, V, n * k, stream);
-  math_t normP = raft::matrix::getL2Norm(handle, P_d.data(), m * n, stream);
+  math_t normA = raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(A_d, m, n));
+  math_t normU = raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(U, m, k));
+  math_t normS =
+    raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(S_mat.data(), k, k));
+  math_t normV = raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(V, n, k));
+  math_t normP =
+    raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(P_d.data(), m, n));
 
   // calculate percent error
   const math_t alpha = 1.0, beta = -1.0;
@@ -315,8 +323,9 @@ bool evaluateSVDByL2Norm(raft::device_resources const& handle,
                              m,
                              stream));
 
-  math_t norm_A_minus_P = raft::matrix::getL2Norm(handle, A_minus_P.data(), m * n, stream);
-  math_t percent_error  = 100.0 * norm_A_minus_P / normA;
+  math_t norm_A_minus_P =
+    raft::matrix::l2_norm(handle, make_device_matrix_view<const math_t>(A_minus_P.data(), m, n));
+  math_t percent_error = 100.0 * norm_A_minus_P / normA;
   return (percent_error / 100.0 < tol);
 }
 
diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh
index 6c65626ac5..e8833a2779 100644
--- a/cpp/include/raft/linalg/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/matrix_vector_op.cuh
@@ -22,6 +22,7 @@
 #include "linalg_types.hpp"
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
@@ -241,4 +242,4 @@ void matrix_vector_op(raft::device_resources const& handle,
 };         // end namespace linalg
 };         // end namespace raft
 
-#endif
\ No newline at end of file
+#endif
diff --git a/cpp/include/raft/matrix/copy.cuh b/cpp/include/raft/matrix/copy.cuh
index 42d2562e5e..e4e5526e71 100644
--- a/cpp/include/raft/matrix/copy.cuh
+++ b/cpp/include/raft/matrix/copy.cuh
@@ -42,7 +42,7 @@ template <typename m_t, typename idx_t, typename layout>
 void copy_rows(raft::device_resources const& handle,
                raft::device_matrix_view<const m_t, idx_t, layout> in,
                raft::device_matrix_view<m_t, idx_t, layout> out,
-               raft::device_vector_view<idx_t, idx_t> indices)
+               raft::device_vector_view<const idx_t, idx_t> indices)
 {
   RAFT_EXPECTS(in.extent(1) == out.extent(1),
                "Input and output matrices must have same number of columns");
@@ -58,6 +58,24 @@ void copy_rows(raft::device_resources const& handle,
                    raft::is_row_major(in));
 }
 
+/**
+ * @brief copy matrix operation for row major matrices.
+ * @param[in] handle: raft handle
+ * @param[in] in: input matrix
+ * @param[out] out: output matrix
+ */
+template <typename m_t, typename matrix_idx_t>
+void copy(raft::device_resources const& handle,
+          raft::device_matrix_view<const m_t, matrix_idx_t, row_major> in,
+          raft::device_matrix_view<m_t, matrix_idx_t, row_major> out)
+{
+  RAFT_EXPECTS(in.extent(0) == out.extent(0) && in.extent(1) == out.extent(1),
+               "Input and output matrix shapes must match.");
+
+  raft::copy_async(
+    out.data_handle(), in.data_handle(), in.extent(0) * out.extent(1), handle.get_stream());
+}
+
 /**
  * @brief copy matrix operation for column major matrices.
  * @param[in] handle: raft handle
diff --git a/cpp/include/raft/matrix/diagonal.cuh b/cpp/include/raft/matrix/diagonal.cuh
index 22147e9f34..c7a3681983 100644
--- a/cpp/include/raft/matrix/diagonal.cuh
+++ b/cpp/include/raft/matrix/diagonal.cuh
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
-#include <raft/matrix/matrix.cuh>
 
 namespace raft::matrix {
 
@@ -34,7 +34,7 @@ namespace raft::matrix {
  * @param[out] matrix: matrix of size n_rows x n_cols
  */
 template <typename m_t, typename idx_t, typename layout>
-void set_diagonal(raft::device_resources const& handle,
+void set_diagonal(raft::resources const& handle,
                   raft::device_vector_view<const m_t, idx_t> vec,
                   raft::device_matrix_view<m_t, idx_t, layout> matrix)
 {
@@ -45,7 +45,7 @@ void set_diagonal(raft::device_resources const& handle,
                                    matrix.data_handle(),
                                    matrix.extent(0),
                                    matrix.extent(1),
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -55,7 +55,7 @@ void set_diagonal(raft::device_resources const& handle,
  * @param[out] vec: vector of length k = min(n_rows, n_cols)
  */
 template <typename m_t, typename idx_t, typename layout>
-void get_diagonal(raft::device_resources const& handle,
+void get_diagonal(raft::resources const& handle,
                   raft::device_matrix_view<const m_t, idx_t, layout> matrix,
                   raft::device_vector_view<m_t, idx_t> vec)
 {
@@ -65,7 +65,7 @@ void get_diagonal(raft::device_resources const& handle,
                             matrix.data_handle(),
                             matrix.extent(0),
                             matrix.extent(1),
-                            handle.get_stream());
+                            resource::get_cuda_stream(handle));
 }
 
 /**
@@ -74,14 +74,15 @@ void get_diagonal(raft::device_resources const& handle,
  * @param[inout] inout: square input matrix with size len x len
  */
 template <typename m_t, typename idx_t, typename layout>
-void invert_diagonal(raft::device_resources const& handle,
+void invert_diagonal(raft::resources const& handle,
                      raft::device_matrix_view<m_t, idx_t, layout> inout)
 {
   // TODO: Use get_diagonal for this to support rectangular
   RAFT_EXPECTS(inout.extent(0) == inout.extent(1), "Matrix must be square.");
-  detail::getDiagonalInverseMatrix(inout.data_handle(), inout.extent(0), handle.get_stream());
+  detail::getDiagonalInverseMatrix(
+    inout.data_handle(), inout.extent(0), resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group matrix_diagonal
 
-}  // namespace raft::matrix
\ No newline at end of file
+}  // namespace raft::matrix
diff --git a/cpp/include/raft/matrix/init.cuh b/cpp/include/raft/matrix/init.cuh
index ed2fb4d209..9611e044f4 100644
--- a/cpp/include/raft/matrix/init.cuh
+++ b/cpp/include/raft/matrix/init.cuh
@@ -20,7 +20,6 @@
 #include <raft/core/host_mdspan.hpp>
 #include <raft/linalg/map.cuh>
 #include <raft/matrix/detail/math.cuh>
-#include <raft/matrix/matrix.cuh>
 
 namespace raft::matrix {
 
diff --git a/cpp/include/raft/matrix/linewise_op.cuh b/cpp/include/raft/matrix/linewise_op.cuh
index 33de112a35..f8e3555d9d 100644
--- a/cpp/include/raft/matrix/linewise_op.cuh
+++ b/cpp/include/raft/matrix/linewise_op.cuh
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/matrix/detail/matrix.cuh>
-#include <raft/matrix/matrix.cuh>
+#include <raft/core/device_resources.hpp>
+#include <raft/matrix/detail/linewise_op.cuh>
 
 namespace raft::matrix {
 
diff --git a/cpp/include/raft/matrix/print.cuh b/cpp/include/raft/matrix/print.cuh
index 6a4bfbdd01..f2c2653211 100644
--- a/cpp/include/raft/matrix/print.cuh
+++ b/cpp/include/raft/matrix/print.cuh
@@ -19,7 +19,6 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
 #include <raft/matrix/detail/matrix.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/matrix/matrix_types.hpp>
 
 namespace raft::matrix {
diff --git a/cpp/include/raft/matrix/sign_flip.cuh b/cpp/include/raft/matrix/sign_flip.cuh
index d069c55880..93962fb67d 100644
--- a/cpp/include/raft/matrix/sign_flip.cuh
+++ b/cpp/include/raft/matrix/sign_flip.cuh
@@ -18,7 +18,6 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/matrix/detail/matrix.cuh>
-#include <raft/matrix/matrix.cuh>
 
 namespace raft::matrix {
 
diff --git a/cpp/include/raft/matrix/slice.cuh b/cpp/include/raft/matrix/slice.cuh
index bb92b2b86f..071a10a847 100644
--- a/cpp/include/raft/matrix/slice.cuh
+++ b/cpp/include/raft/matrix/slice.cuh
@@ -76,4 +76,4 @@ void slice(raft::device_resources const& handle,
 
 /** @} */  // end group matrix_slice
 
-}  // namespace raft::matrix
\ No newline at end of file
+}  // namespace raft::matrix
diff --git a/cpp/include/raft/matrix/sqrt.cuh b/cpp/include/raft/matrix/sqrt.cuh
index 9729f9b3d5..309ae3452f 100644
--- a/cpp/include/raft/matrix/sqrt.cuh
+++ b/cpp/include/raft/matrix/sqrt.cuh
@@ -19,7 +19,6 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
 #include <raft/matrix/detail/matrix.cuh>
-#include <raft/matrix/matrix.cuh>
 
 namespace raft::matrix {
 
diff --git a/cpp/include/raft/matrix/triangular.cuh b/cpp/include/raft/matrix/triangular.cuh
index 3c60cc362f..0c89140046 100644
--- a/cpp/include/raft/matrix/triangular.cuh
+++ b/cpp/include/raft/matrix/triangular.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -33,17 +34,19 @@ namespace raft::matrix {
  * @param[out] dst: output matrix with a size of kxk, k = min(n_rows, n_cols)
  */
 template <typename m_t, typename idx_t>
-void upper_triangular(raft::device_resources const& handle,
+void upper_triangular(raft::resources const& handle,
                       raft::device_matrix_view<const m_t, idx_t, col_major> src,
                       raft::device_matrix_view<m_t, idx_t, col_major> dst)
 {
   auto k = std::min(src.extent(0), src.extent(1));
   RAFT_EXPECTS(k == dst.extent(0) && k == dst.extent(1),
                "dst should be of size kxk, k = min(n_rows, n_cols)");
-  detail::copyUpperTriangular(
-    src.data_handle(), dst.data_handle(), src.extent(0), src.extent(1), handle.get_stream());
+  detail::copyUpperTriangular(src.data_handle(),
+                              dst.data_handle(),
+                              src.extent(0),
+                              src.extent(1),
+                              resource::get_cuda_stream(handle));
 }
-
 /** @} */  // end group matrix_triangular
 
-}  // namespace raft::matrix
\ No newline at end of file
+}  // namespace raft::matrix
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index b17b3a3559..53d8823eea 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -181,8 +181,12 @@ void select_residuals(raft::device_resources const& handle,
     dataset, utils::mapping<float>{});
   raft::matrix::gather(mapping_itr, (IdxT)dim, n_rows, row_ids, n_rows, tmp.data(), stream);
 
-  raft::matrix::linewiseOp(
-    tmp.data(), tmp.data(), IdxT(dim), n_rows, true, raft::sub_op{}, stream, center);
+  raft::matrix::linewise_op(handle,
+                            make_device_matrix_view<const T, IdxT>(tmp.data(), n_rows, dim),
+                            make_device_matrix_view<T, IdxT>(tmp.data(), n_rows, dim),
+                            true,
+                            raft::sub_op{},
+                            make_device_vector_view<const T, IdxT>(center, dim));
 
   float alpha = 1.0;
   float beta  = 0.0;
diff --git a/cpp/include/raft/neighbors/refine-inl.cuh b/cpp/include/raft/neighbors/refine-inl.cuh
index 4243d7e723..2c4dfb422e 100644
--- a/cpp/include/raft/neighbors/refine-inl.cuh
+++ b/cpp/include/raft/neighbors/refine-inl.cuh
@@ -19,7 +19,6 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
-#include <raft/matrix/matrix.cuh>
 #include <raft/neighbors/detail/refine.cuh>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh
index 1715dcbe81..aec1a15f84 100644
--- a/cpp/include/raft/random/detail/make_regression.cuh
+++ b/cpp/include/raft/random/detail/make_regression.cuh
@@ -29,7 +29,7 @@
 #include <raft/linalg/init.cuh>
 #include <raft/linalg/qr.cuh>
 #include <raft/linalg/transpose.cuh>
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/diagonal.cuh>
 #include <raft/random/permute.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -83,7 +83,10 @@ static void _make_low_rank_matrix(raft::resources const& handle,
   RAFT_CUDA_TRY(cudaPeekAtLastError());
   rmm::device_uvector<DataT> singular_mat(n * n, stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(singular_mat.data(), 0, n * n * sizeof(DataT), stream));
-  raft::matrix::initializeDiagonalMatrix(singular_vec.data(), singular_mat.data(), n, n, stream);
+
+  raft::matrix::set_diagonal(handle,
+                             make_device_vector_view<const DataT, IdxT>(singular_vec.data(), n),
+                             make_device_matrix_view<DataT, IdxT>(singular_mat.data(), n, n));
 
   // Generate the column-major matrix
   rmm::device_uvector<DataT> temp_q0s(n_rows * n, stream);
diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
index 6649c10c47..527fc14208 100644
--- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
@@ -20,7 +20,6 @@
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
index c8fc6eefda..a58847ee41 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
@@ -30,7 +30,7 @@
 
 #include <raft/neighbors/detail/faiss_select/key_value_block_select.cuh>
 
-#include <raft/matrix/matrix.cuh>
+#include <raft/matrix/copy.cuh>
 #include <raft/neighbors/brute_force.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/sparse/convert/csr.cuh>
@@ -94,14 +94,16 @@ void sample_landmarks(raft::device_resources const& handle,
                                          (value_idx)index.n_landmarks,
                                          (value_idx)index.m);
 
-  raft::matrix::copyRows<value_t, value_idx, size_t>(index.get_X().data_handle(),
-                                                     index.m,
-                                                     index.n,
-                                                     index.get_R().data_handle(),
-                                                     R_1nn_cols2.data(),
-                                                     index.n_landmarks,
-                                                     handle.get_stream(),
-                                                     true);
+  // index.get_X() returns the wrong indextype (uint32_t where we need value_idx), so need to
+  // create new device_matrix_view here
+  auto x = index.get_X();
+  auto r = index.get_R();
+
+  raft::matrix::copy_rows<value_t, value_idx>(
+    handle,
+    make_device_matrix_view<const value_t, value_idx>(x.data_handle(), x.extent(0), x.extent(1)),
+    make_device_matrix_view<value_t, value_idx>(r.data_handle(), r.extent(0), r.extent(1)),
+    make_device_vector_view(R_1nn_cols2.data(), index.n_landmarks));
 }
 
 /**
diff --git a/cpp/test/linalg/svd.cu b/cpp/test/linalg/svd.cu
index c780476a5f..9907172956 100644
--- a/cpp/test/linalg/svd.cu
+++ b/cpp/test/linalg/svd.cu
@@ -18,7 +18,6 @@
 #include <gtest/gtest.h>
 #include <raft/linalg/init.cuh>
 #include <raft/linalg/svd.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu
index 10105203f7..07ab3c5ce4 100644
--- a/cpp/test/matrix/matrix.cu
+++ b/cpp/test/matrix/matrix.cu
@@ -143,7 +143,7 @@ class MatrixCopyRowsTest : public ::testing::Test {
       output.data(), n_selected, n_cols);
 
     auto indices_view =
-      raft::make_device_vector_view<idx_array_t, idx_array_t>(indices.data(), n_selected);
+      raft::make_device_vector_view<const idx_array_t, idx_array_t>(indices.data(), n_selected);
 
     raft::matrix::copy_rows(handle, input_view, output_view, indices_view);
 
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index 438c56da21..67df5f2abe 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -18,8 +18,8 @@
 
 #include <raft/core/device_mdarray.hpp>  // raft::make_device_matrix
 #include <raft/distance/distance_types.hpp>
+#include <raft/matrix/copy.cuh>
 #include <raft/matrix/detail/select_k.cuh>
-#include <raft/matrix/matrix.cuh>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cuda_utils.cuh>
 
@@ -188,8 +188,11 @@ auto eval_distances(raft::device_resources const& handle,
     auto y          = raft::make_device_matrix<T, IdxT>(handle, k, n_cols);
     auto naive_dist = raft::make_device_matrix<DistT, IdxT>(handle, 1, k);
 
-    raft::matrix::copyRows<T, IdxT, int64_t>(
-      x, k, n_cols, y.data_handle(), neighbors + i * k, k, handle.get_stream(), true);
+    raft::matrix::copy_rows<T, IdxT>(
+      handle,
+      make_device_matrix_view<const T, IdxT>(x, k, n_cols),
+      y.view(),
+      make_device_vector_view<const IdxT, IdxT>(neighbors + i * k, k));
 
     dim3 block_dim(16, 32, 1);
     auto grid_y =

From 56815fbb1b3079d66785be26e4ab21d5072bb93f Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 12 May 2023 14:17:03 -0400
Subject: [PATCH 51/78] Adding bfknn and ivf-pq python api to docs (#1507)

Closes #1506. The IVF-PQ docs seem to be rendering fine for me locally. I'm still unsure why they aren't rendering in the nightlies.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Ben Frederickson (https://github.com/benfred)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1507
---
 dependencies.yaml                       |  1 +
 docs/source/pylibraft_api/neighbors.rst | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/dependencies.yaml b/dependencies.yaml
index ccaf3fe0d8..c768fe0333 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -46,6 +46,7 @@ files:
   docs:
     output: none
     includes:
+      - test_pylibraft
       - cudatoolkit
       - docs
       - py_version
diff --git a/docs/source/pylibraft_api/neighbors.rst b/docs/source/pylibraft_api/neighbors.rst
index 89bb577027..c314f1c84d 100644
--- a/docs/source/pylibraft_api/neighbors.rst
+++ b/docs/source/pylibraft_api/neighbors.rst
@@ -8,6 +8,28 @@ This page provides pylibraft class references for the publicly-exposed elements
    :class: highlight
 
 
+Brute Force
+###########
+
+.. autofunction:: pylibraft.neighbors.brute_force.knn
+
+
+IVF-Flat
+########
+
+.. autoclass:: pylibraft.neighbors.ivf_flat.IndexParams
+    :members:
+
+.. autofunction:: pylibraft.neighbors.ivf_flat.build
+
+.. autofunction:: pylibraft.neighbors.ivf_flat.extend
+
+.. autoclass:: pylibraft.neighbors.ivf_flat.SearchParams
+    :members:
+
+.. autofunction:: pylibraft.neighbors.ivf_flat.search
+
+
 IVF-PQ
 ######
 

From b32cd60ab16b664766b42fb87dbae93a4c9b49b9 Mon Sep 17 00:00:00 2001
From: tsuki <12711693+enp1s0@users.noreply.github.com>
Date: Sat, 13 May 2023 04:55:54 +0900
Subject: [PATCH 52/78] Use rmm allocator in CAGRA prune (#1503)

This PR modifies the CAGRA prune function to use rmm allocator instead of a custom memory allocator. And also removes the multi-GPU functionality that is not suitable for the [RAFT policy](https://docs.rapids.ai/api/raft/stable/developer_guide/#multi-gpu).

rel: #1461

Authors:
  - tsuki (https://github.com/enp1s0)
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1503
---
 .../neighbors/detail/cagra/graph_core.cuh     | 841 +++++++-----------
 1 file changed, 318 insertions(+), 523 deletions(-)

diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
index a08c83677b..b7fffb4eaa 100644
--- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
@@ -35,20 +35,8 @@
 namespace raft::neighbors::experimental::cagra::detail {
 namespace graph {
 
-template <class T>
-__host__ __device__ float compute_norm2(const T* a,
-                                        const T* b,
-                                        const std::size_t dim,
-                                        const float scale)
-{
-  float sum = 0.f;
-  for (std::size_t j = 0; j < dim; j++) {
-    const auto diff = a[j] * scale - b[j] * scale;
-    sum += diff * diff;
-  }
-  return sum;
-}
-
+// unnamed namespace to avoid multiple definition error
+namespace {
 inline double cur_time(void)
 {
   struct timeval tv;
@@ -76,25 +64,18 @@ __device__ inline bool swap_if_needed(K& key1, K& key2, V& val1, V& val2, bool a
   return false;
 }
 
-template <class DATA_T, int blockDim_x, int numElementsPerThread>
-__global__ void kern_sort(
-  DATA_T** dataset,             // [num_gpus][dataset_chunk_size, dataset_dim]
-  uint32_t dataset_size,
-  uint32_t dataset_chunk_size,  // (*) num_gpus * dataset_chunk_size >= dataset_size
-  uint32_t dataset_dim,
-  float scale,
-  uint32_t** knn_graph,       // [num_gpus][graph_chunk_size, graph_degree]
-  uint32_t graph_size,
-  uint32_t graph_chunk_size,  // (*) num_gpus * graph_chunk_size >= graph_size
-  uint32_t graph_degree,
-  int dev_id)
+template <class DATA_T, class IdxT, int blockDim_x, int numElementsPerThread>
+__global__ void kern_sort(const DATA_T* const dataset,  // [dataset_chunk_size, dataset_dim]
+                          const IdxT dataset_size,
+                          const uint32_t dataset_dim,
+                          IdxT* const knn_graph,  // [graph_chunk_size, graph_degree]
+                          const uint32_t graph_size,
+                          const uint32_t graph_degree)
 {
   __shared__ float smem_keys[blockDim_x * numElementsPerThread];
-  __shared__ uint32_t smem_vals[blockDim_x * numElementsPerThread];
+  __shared__ IdxT smem_vals[blockDim_x * numElementsPerThread];
 
-  uint64_t srcNode     = blockIdx.x + ((uint64_t)graph_chunk_size * dev_id);
-  uint64_t srcNode_dev = srcNode / graph_chunk_size;
-  uint64_t srcNode_loc = srcNode % graph_chunk_size;
+  const IdxT srcNode = blockIdx.x;
   if (srcNode >= graph_size) { return; }
 
   const uint32_t num_warps = blockDim_x / 32;
@@ -103,14 +84,13 @@ __global__ void kern_sort(
 
   // Compute distance from a src node to its neighbors
   for (int k = warp_id; k < graph_degree; k += num_warps) {
-    uint64_t dstNode     = knn_graph[srcNode_dev][k + ((uint64_t)graph_degree * srcNode_loc)];
-    uint64_t dstNode_dev = dstNode / graph_chunk_size;
-    uint64_t dstNode_loc = dstNode % graph_chunk_size;
-    float dist           = 0.0;
+    const IdxT dstNode = knn_graph[k + ((uint64_t)graph_degree * srcNode)];
+    float dist         = 0.0;
     for (int d = lane_id; d < dataset_dim; d += 32) {
-      float diff =
-        (float)(dataset[srcNode_dev][d + ((uint64_t)dataset_dim * srcNode_loc)]) * scale -
-        (float)(dataset[dstNode_dev][d + ((uint64_t)dataset_dim * dstNode_loc)]) * scale;
+      float diff = spatial::knn::detail::utils::mapping<float>{}(
+                     dataset[d + ((uint64_t)dataset_dim * srcNode)]) -
+                   spatial::knn::detail::utils::mapping<float>{}(
+                     dataset[d + ((uint64_t)dataset_dim * dstNode)]);
       dist += diff * diff;
     }
     dist += __shfl_xor_sync(0xffffffff, dist, 1);
@@ -126,41 +106,41 @@ __global__ void kern_sort(
   __syncthreads();
 
   float my_keys[numElementsPerThread];
-  uint32_t my_vals[numElementsPerThread];
+  IdxT my_vals[numElementsPerThread];
   for (int i = 0; i < numElementsPerThread; i++) {
-    int k = i + (numElementsPerThread * threadIdx.x);
+    const int k = i + (numElementsPerThread * threadIdx.x);
     if (k < graph_degree) {
       my_keys[i] = smem_keys[k];
       my_vals[i] = smem_vals[k];
     } else {
       my_keys[i] = FLT_MAX;
-      my_vals[i] = 0xffffffffU;
+      my_vals[i] = ~static_cast<IdxT>(0);
     }
   }
   __syncthreads();
 
   // Sorting by thread
-  uint32_t mask  = 1;
-  bool ascending = ((threadIdx.x & mask) == 0);
+  uint32_t mask        = 1;
+  const bool ascending = ((threadIdx.x & mask) == 0);
   for (int j = 0; j < numElementsPerThread; j += 2) {
 #pragma unroll
     for (int i = 0; i < numElementsPerThread; i += 2) {
-      swap_if_needed<float, uint32_t>(
+      swap_if_needed<float, IdxT>(
         my_keys[i], my_keys[i + 1], my_vals[i], my_vals[i + 1], ascending);
     }
 #pragma unroll
     for (int i = 1; i < numElementsPerThread - 1; i += 2) {
-      swap_if_needed<float, uint32_t>(
+      swap_if_needed<float, IdxT>(
         my_keys[i], my_keys[i + 1], my_vals[i], my_vals[i + 1], ascending);
     }
   }
 
   // Bitonic Sorting
   while (mask < blockDim_x) {
-    uint32_t next_mask = mask << 1;
+    const uint32_t next_mask = mask << 1;
 
     for (uint32_t curr_mask = mask; curr_mask > 0; curr_mask >>= 1) {
-      bool ascending = ((threadIdx.x & curr_mask) == 0) == ((threadIdx.x & next_mask) == 0);
+      const bool ascending = ((threadIdx.x & curr_mask) == 0) == ((threadIdx.x & next_mask) == 0);
       if (mask >= 32) {
         // inter warp
         __syncthreads();
@@ -172,29 +152,29 @@ __global__ void kern_sort(
         __syncthreads();
 #pragma unroll
         for (int i = 0; i < numElementsPerThread; i++) {
-          float opp_key    = smem_keys[(threadIdx.x ^ curr_mask) + (blockDim_x * i)];
-          uint32_t opp_val = smem_vals[(threadIdx.x ^ curr_mask) + (blockDim_x * i)];
-          swap_if_needed<float, uint32_t>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
+          float opp_key = smem_keys[(threadIdx.x ^ curr_mask) + (blockDim_x * i)];
+          IdxT opp_val  = smem_vals[(threadIdx.x ^ curr_mask) + (blockDim_x * i)];
+          swap_if_needed<float, IdxT>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
         }
       } else {
 // intra warp
 #pragma unroll
         for (int i = 0; i < numElementsPerThread; i++) {
-          float opp_key    = __shfl_xor_sync(0xffffffff, my_keys[i], curr_mask);
-          uint32_t opp_val = __shfl_xor_sync(0xffffffff, my_vals[i], curr_mask);
-          swap_if_needed<float, uint32_t>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
+          float opp_key = __shfl_xor_sync(0xffffffff, my_keys[i], curr_mask);
+          IdxT opp_val  = __shfl_xor_sync(0xffffffff, my_vals[i], curr_mask);
+          swap_if_needed<float, IdxT>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
         }
       }
     }
 
-    bool ascending = ((threadIdx.x & next_mask) == 0);
+    const bool ascending = ((threadIdx.x & next_mask) == 0);
 #pragma unroll
     for (uint32_t curr_mask = numElementsPerThread / 2; curr_mask > 0; curr_mask >>= 1) {
 #pragma unroll
       for (int i = 0; i < numElementsPerThread; i++) {
         int j = i ^ curr_mask;
         if (i > j) continue;
-        swap_if_needed<float, uint32_t>(my_keys[i], my_keys[j], my_vals[i], my_vals[j], ascending);
+        swap_if_needed<float, IdxT>(my_keys[i], my_keys[j], my_vals[i], my_vals[j], ascending);
       }
     }
     mask = next_mask;
@@ -202,54 +182,47 @@ __global__ void kern_sort(
 
   // Update knn_graph
   for (int i = 0; i < numElementsPerThread; i++) {
-    int k = i + (numElementsPerThread * threadIdx.x);
+    const int k = i + (numElementsPerThread * threadIdx.x);
     if (k < graph_degree) {
-      knn_graph[srcNode_dev][k + ((uint64_t)graph_degree * srcNode_loc)] = my_vals[i];
+      knn_graph[k + (static_cast<uint64_t>(graph_degree) * srcNode)] = my_vals[i];
     }
   }
 }
 
-template <int MAX_DEGREE>
-__global__ void kern_prune(
-  uint32_t** knn_graph,       // [num_gpus][graph_chunk_size, graph_degree]
-  uint32_t graph_size,
-  uint32_t graph_chunk_size,  // (*) num_gpus * graph_chunk_size >= graph_size
-  uint32_t graph_degree,
-  uint32_t degree,
-  int dev_id,
-  uint32_t batch_size,
-  uint32_t batch_id,
-  uint8_t** detour_count,          // [num_gpus][graph_chunk_size, graph_degree]
-  uint32_t** num_no_detour_edges,  // [num_gpus][graph_size]
-  uint64_t* stats)
+template <int MAX_DEGREE, class IdxT>
+__global__ void kern_prune(const IdxT* const knn_graph,  // [graph_chunk_size, graph_degree]
+                           const uint32_t graph_size,
+                           const uint32_t graph_degree,
+                           const uint32_t degree,
+                           const uint32_t batch_size,
+                           const uint32_t batch_id,
+                           uint8_t* const detour_count,          // [graph_chunk_size, graph_degree]
+                           uint32_t* const num_no_detour_edges,  // [graph_size]
+                           uint64_t* const stats)
 {
   __shared__ uint32_t smem_num_detour[MAX_DEGREE];
-  uint64_t* num_retain = stats;
-  uint64_t* num_full   = stats + 1;
+  uint64_t* const num_retain = stats;
+  uint64_t* const num_full   = stats + 1;
 
-  uint64_t nid = blockIdx.x + (batch_size * batch_id);
-  if (nid >= graph_chunk_size) { return; }
+  const uint64_t nid = blockIdx.x + (batch_size * batch_id);
+  if (nid >= graph_size) { return; }
   for (uint32_t k = threadIdx.x; k < graph_degree; k += blockDim.x) {
     smem_num_detour[k] = 0;
   }
   __syncthreads();
 
-  uint64_t iA     = nid + ((uint64_t)graph_chunk_size * dev_id);
-  uint64_t iA_dev = iA / graph_chunk_size;
-  uint64_t iA_loc = iA % graph_chunk_size;
+  const uint64_t iA = nid;
   if (iA >= graph_size) { return; }
 
   // count number of detours (A->D->B)
   for (uint32_t kAD = 0; kAD < graph_degree - 1; kAD++) {
-    uint64_t iD     = knn_graph[iA_dev][kAD + (graph_degree * iA_loc)];
-    uint64_t iD_dev = iD / graph_chunk_size;
-    uint64_t iD_loc = iD % graph_chunk_size;
+    const uint64_t iD = knn_graph[kAD + (graph_degree * iA)];
     for (uint32_t kDB = threadIdx.x; kDB < graph_degree; kDB += blockDim.x) {
-      uint64_t iB_candidate = knn_graph[iD_dev][kDB + ((uint64_t)graph_degree * iD_loc)];
+      const uint64_t iB_candidate = knn_graph[kDB + ((uint64_t)graph_degree * iD)];
       for (uint32_t kAB = kAD + 1; kAB < graph_degree; kAB++) {
         // if ( kDB < kAB )
         {
-          uint64_t iB = knn_graph[iA_dev][kAB + (graph_degree * iA_loc)];
+          const uint64_t iB = knn_graph[kAB + (graph_degree * iA)];
           if (iB == iB_candidate) {
             atomicAdd(smem_num_detour + kAB, 1);
             break;
@@ -262,7 +235,7 @@ __global__ void kern_prune(
 
   uint32_t num_edges_no_detour = 0;
   for (uint32_t k = threadIdx.x; k < graph_degree; k += blockDim.x) {
-    detour_count[iA_dev][k + (graph_degree * iA_loc)] = min(smem_num_detour[k], (uint32_t)255);
+    detour_count[k + (graph_degree * iA)] = min(smem_num_detour[k], (uint32_t)255);
     if (smem_num_detour[k] == 0) { num_edges_no_detour++; }
   }
   num_edges_no_detour += __shfl_xor_sync(0xffffffff, num_edges_no_detour, 1);
@@ -273,119 +246,29 @@ __global__ void kern_prune(
   num_edges_no_detour = min(num_edges_no_detour, degree);
 
   if (threadIdx.x == 0) {
-    num_no_detour_edges[iA_dev][iA_loc] = num_edges_no_detour;
+    num_no_detour_edges[iA] = num_edges_no_detour;
     atomicAdd((unsigned long long int*)num_retain, (unsigned long long int)num_edges_no_detour);
     if (num_edges_no_detour >= degree) { atomicAdd((unsigned long long int*)num_full, 1); }
   }
 }
 
-// unnamed namespace to avoid multiple definition error
-namespace {
-__global__ void kern_make_rev_graph(const uint32_t i_gpu,
-                                    const uint32_t* dest_nodes,  // [global_graph_size]
-                                    const uint32_t global_graph_size,
-                                    uint32_t* rev_graph,         // [graph_size, degree]
-                                    uint32_t* rev_graph_count,   // [graph_size]
+template <class IdxT>
+__global__ void kern_make_rev_graph(const IdxT* const dest_nodes,     // [graph_size]
+                                    IdxT* const rev_graph,            // [size, degree]
+                                    uint32_t* const rev_graph_count,  // [graph_size]
                                     const uint32_t graph_size,
                                     const uint32_t degree)
 {
   const uint32_t tid  = threadIdx.x + (blockDim.x * blockIdx.x);
   const uint32_t tnum = blockDim.x * gridDim.x;
 
-  for (uint32_t gl_src_id = tid; gl_src_id < global_graph_size; gl_src_id += tnum) {
-    uint32_t gl_dest_id = dest_nodes[gl_src_id];
-    if (gl_dest_id < graph_size * i_gpu) continue;
-    if (gl_dest_id >= graph_size * (i_gpu + 1)) continue;
-    if (gl_dest_id >= global_graph_size) continue;
-
-    uint32_t dest_id = gl_dest_id - (graph_size * i_gpu);
-    uint32_t pos     = atomicAdd(rev_graph_count + dest_id, 1);
-    if (pos < degree) { rev_graph[pos + ((uint64_t)degree * dest_id)] = gl_src_id; }
-  }
-}
-}  // namespace
-template <class T>
-T*** mgpu_alloc(int n_gpus, uint32_t chunk, uint32_t nelems)
-{
-  T** arrays;                                      // [n_gpus][chunk, nelems]
-  arrays       = (T**)malloc(sizeof(T*) * n_gpus); /* h1 */
-  size_t bsize = sizeof(T) * chunk * nelems;
-  // RAFT_LOG_DEBUG("[%s, %s, %d] n_gpus: %d, chunk: %u, nelems: %u, bsize: %lu (%lu MiB)\n",
-  //         __FILE__, __func__, __LINE__, n_gpus, chunk, nelems, bsize, bsize / 1024 / 1024);
-  for (int i_gpu = 0; i_gpu < n_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaMalloc(&(arrays[i_gpu]), bsize)); /* d1 */
-  }
-  T*** d_arrays;                                        // [n_gpus+1][n_gpus][chunk, nelems]
-  d_arrays = (T***)malloc(sizeof(T**) * (n_gpus + 1));  /* h2 */
-  bsize    = sizeof(T*) * n_gpus;
-  for (int i_gpu = 0; i_gpu < n_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaMalloc(&(d_arrays[i_gpu]), bsize)); /* d2 */
-    RAFT_CUDA_TRY(cudaMemcpy(d_arrays[i_gpu], arrays, bsize, cudaMemcpyDefault));
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-  d_arrays[n_gpus] = arrays;
-  return d_arrays;
-}
-
-template <class T>
-void mgpu_free(T*** d_arrays, int n_gpus)
-{
-  for (int i_gpu = 0; i_gpu < n_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaFree(d_arrays[n_gpus][i_gpu])); /* d1 */
-    RAFT_CUDA_TRY(cudaFree(d_arrays[i_gpu]));         /* d2 */
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-  free(d_arrays[n_gpus]); /* h1 */
-  free(d_arrays);         /* h2 */
-}
-
-template <class T>
-void mgpu_H2D(T*** d_arrays,     // [n_gpus+1][n_gpus][chunk, nelems]
-              const T* h_array,  // [size, nelems]
-              int n_gpus,
-              uint32_t size,
-              uint32_t chunk,  // (*) n_gpus * chunk >= size
-              uint32_t nelems)
-{
-#pragma omp parallel num_threads(n_gpus)
-  {
-    int i_gpu = omp_get_thread_num();
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    uint32_t _chunk = std::min(size - (chunk * i_gpu), chunk);
-    size_t bsize    = sizeof(T) * _chunk * nelems;
-    RAFT_CUDA_TRY(cudaMemcpy(d_arrays[n_gpus][i_gpu],
-                             h_array + ((uint64_t)chunk * nelems * i_gpu),
-                             bsize,
-                             cudaMemcpyDefault));
-  }
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-}
+  for (uint32_t src_id = tid; src_id < graph_size; src_id += tnum) {
+    const IdxT dest_id = dest_nodes[src_id];
+    if (dest_id >= graph_size) continue;
 
-template <class T>
-void mgpu_D2H(T*** d_arrays,  // [n_gpus+1][n_gpus][chunk, nelems]
-              T* h_array,     // [size, nelems]
-              int n_gpus,
-              uint32_t size,
-              uint32_t chunk,  // (*) n_gpus * chunk >= size
-              uint32_t nelems)
-{
-#pragma omp parallel num_threads(n_gpus)
-  {
-    int i_gpu = omp_get_thread_num();
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    uint32_t _chunk = std::min(size - (chunk * i_gpu), chunk);
-    size_t bsize    = sizeof(T) * _chunk * nelems;
-    RAFT_CUDA_TRY(cudaMemcpy(h_array + ((uint64_t)chunk * nelems * i_gpu),
-                             d_arrays[n_gpus][i_gpu],
-                             bsize,
-                             cudaMemcpyDefault));
+    const uint32_t pos = atomicAdd(rev_graph_count + dest_id, 1);
+    if (pos < degree) { rev_graph[pos + ((uint64_t)degree * dest_id)] = src_id; }
   }
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  RAFT_CUDA_TRY(cudaSetDevice(0));
 }
 
 template <class T>
@@ -404,6 +287,7 @@ void shift_array(T* array, uint64_t num)
     array[i] = array[i - 1];
   }
 }
+}  // namespace
 
 template <typename DataT,
           typename IdxT = uint32_t,
@@ -421,107 +305,74 @@ void sort_knn_graph(raft::device_resources const& res,
   const uint32_t dataset_dim  = dataset.extent(1);
   const DataT* dataset_ptr    = dataset.data_handle();
 
+  const IdxT graph_size             = dataset_size;
   const uint32_t input_graph_degree = knn_graph.extent(1);
-  uint32_t* input_graph_ptr         = (uint32_t*)knn_graph.data_handle();
-
-  // Setup GPUs
-  int num_gpus = 0;
-
-  // Setup GPUs
-  RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus));
-  RAFT_LOG_DEBUG("# num_gpus: %d\n", num_gpus);
-  for (int self = 0; self < num_gpus; self++) {
-    RAFT_CUDA_TRY(cudaSetDevice(self));
-    for (int peer = 0; peer < num_gpus; peer++) {
-      if (self == peer) { continue; }
-      RAFT_CUDA_TRY(cudaDeviceEnablePeerAccess(peer, 0));
-    }
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
+  IdxT* const input_graph_ptr       = knn_graph.data_handle();
 
-  const uint32_t graph_size       = knn_graph.extent(0);
-  uint32_t*** d_input_graph_ptr   = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
-  const uint32_t graph_chunk_size = (graph_size + num_gpus - 1) / num_gpus;
-  d_input_graph_ptr = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, input_graph_degree);
+  auto d_input_graph = raft::make_device_matrix<IdxT, IdxT>(res, graph_size, input_graph_degree);
 
-  DataT*** d_dataset_ptr            = NULL;  // [num_gpus+1][...][...]
-  const uint32_t dataset_chunk_size = (dataset_size + num_gpus - 1) / num_gpus;
-  assert(dataset_chunk_size == graph_chunk_size);
-  d_dataset_ptr = mgpu_alloc<DataT>(num_gpus, dataset_chunk_size, dataset_dim);
+  //
+  // Sorting kNN graph
+  //
+  const double time_sort_start = cur_time();
+  RAFT_LOG_DEBUG("# Sorting kNN Graph on GPUs ");
 
-  const float scale = 1.0f / raft::spatial::knn::detail::utils::config<DataT>::kDivisor;
+  auto d_dataset = raft::make_device_matrix<DataT, IdxT>(res, dataset_size, dataset_dim);
+  raft::copy(d_dataset.data_handle(), dataset_ptr, dataset_size * dataset_dim, res.get_stream());
 
-  mgpu_H2D<DataT>(
-    d_dataset_ptr, dataset_ptr, num_gpus, dataset_size, dataset_chunk_size, dataset_dim);
+  raft::copy(d_input_graph.data_handle(),
+             input_graph_ptr,
+             graph_size * input_graph_degree,
+             res.get_stream());
 
-  double time_sort_start = cur_time();
-  RAFT_LOG_DEBUG("# Sorting kNN Graph on GPUs ");
-  mgpu_H2D<uint32_t>(d_input_graph_ptr,
-                     input_graph_ptr,
-                     num_gpus,
-                     dataset_size,
-                     graph_chunk_size,
-                     input_graph_degree);
   void (*kernel_sort)(
-    DataT**, uint32_t, uint32_t, uint32_t, float, uint32_t**, uint32_t, uint32_t, uint32_t, int);
+    const DataT* const, const IdxT, const uint32_t, IdxT* const, const uint32_t, const uint32_t);
   constexpr int numElementsPerThread = 4;
   dim3 threads_sort(1, 1, 1);
   if (input_graph_degree <= numElementsPerThread * 32) {
     constexpr int blockDim_x = 32;
-    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, IdxT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 64) {
     constexpr int blockDim_x = 64;
-    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, IdxT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 128) {
     constexpr int blockDim_x = 128;
-    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, IdxT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else if (input_graph_degree <= numElementsPerThread * 256) {
     constexpr int blockDim_x = 256;
-    kernel_sort              = kern_sort<DataT, blockDim_x, numElementsPerThread>;
+    kernel_sort              = kern_sort<DataT, IdxT, blockDim_x, numElementsPerThread>;
     threads_sort.x           = blockDim_x;
   } else {
-    fprintf(stderr,
-            "[ERROR] The degree of input knn graph is too large (%u). "
-            "It must be equal to or small than %d.\n",
-            input_graph_degree,
-            numElementsPerThread * 256);
+    RAFT_LOG_ERROR(
+      "[ERROR] The degree of input knn graph is too large (%u). "
+      "It must be equal to or small than %d.\n",
+      input_graph_degree,
+      numElementsPerThread * 256);
     exit(-1);
   }
-  dim3 blocks_sort(graph_chunk_size, 1, 1);
-  for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-    RAFT_LOG_DEBUG(".");
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    kernel_sort<<<blocks_sort, threads_sort>>>(d_dataset_ptr[i_gpu],
-                                               dataset_size,
-                                               dataset_chunk_size,
-                                               dataset_dim,
-                                               scale,
-                                               d_input_graph_ptr[i_gpu],
-                                               dataset_size,
-                                               graph_chunk_size,
-                                               input_graph_degree,
-                                               i_gpu);
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
+  dim3 blocks_sort(graph_size, 1, 1);
+  RAFT_LOG_DEBUG(".");
+  kernel_sort<<<blocks_sort, threads_sort, 0, res.get_stream()>>>(d_dataset.data_handle(),
+                                                                  dataset_size,
+                                                                  dataset_dim,
+                                                                  d_input_graph.data_handle(),
+                                                                  graph_size,
+                                                                  input_graph_degree);
+  res.sync_stream();
   RAFT_LOG_DEBUG(".");
-  mgpu_D2H<uint32_t>(d_input_graph_ptr,
-                     input_graph_ptr,
-                     num_gpus,
-                     dataset_size,
-                     graph_chunk_size,
-                     input_graph_degree);
+  raft::copy(input_graph_ptr,
+             d_input_graph.data_handle(),
+             graph_size * input_graph_degree,
+             res.get_stream());
   RAFT_LOG_DEBUG("\n");
-  double time_sort_end = cur_time();
-  RAFT_LOG_DEBUG("# Sorting kNN graph time: %.1lf sec\n", time_sort_end - time_sort_start);
 
-  mgpu_free<DataT>(d_dataset_ptr, num_gpus);
+  const double time_sort_end = cur_time();
+  RAFT_LOG_DEBUG("# Sorting kNN graph time: %.1lf sec\n", time_sort_end - time_sort_start);
 }
 
-/** Input arrays can be both host and device*/
 template <typename IdxT = uint32_t,
           typename g_accessor =
             host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
@@ -538,308 +389,252 @@ void prune(raft::device_resources const& res,
                "output graph cannot have more columns than input graph");
   const uint32_t input_graph_degree  = knn_graph.extent(1);
   const uint32_t output_graph_degree = new_graph.extent(1);
-  uint32_t* input_graph_ptr          = (uint32_t*)knn_graph.data_handle();
-  uint32_t* output_graph_ptr         = new_graph.data_handle();
-  const std::size_t graph_size       = new_graph.extent(0);
-  size_t array_size;
-
-  // Setup GPUs
-  int num_gpus = 0;
-
-  // Setup GPUs
-  RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus));
-  RAFT_LOG_DEBUG("# num_gpus: %d\n", num_gpus);
-  for (int self = 0; self < num_gpus; self++) {
-    RAFT_CUDA_TRY(cudaSetDevice(self));
-    for (int peer = 0; peer < num_gpus; peer++) {
-      if (self == peer) { continue; }
-      RAFT_CUDA_TRY(cudaDeviceEnablePeerAccess(peer, 0));
-    }
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-
-  uint32_t graph_chunk_size     = graph_size;
-  uint32_t*** d_input_graph_ptr = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
-  graph_chunk_size              = (graph_size + num_gpus - 1) / num_gpus;
-  d_input_graph_ptr = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, input_graph_degree);
-
-  //
-  uint8_t* detour_count;  // [graph_size, input_graph_degree]
-  array_size   = sizeof(uint8_t) * graph_size * input_graph_degree;
-  detour_count = (uint8_t*)malloc(array_size);
-  memset(detour_count, 0xff, array_size);
-
-  uint8_t*** d_detour_count = NULL;  // [...][num_gpus][graph_chunk_size, input_graph_degree]
-  d_detour_count            = mgpu_alloc<uint8_t>(num_gpus, graph_chunk_size, input_graph_degree);
-  mgpu_H2D<uint8_t>(
-    d_detour_count, detour_count, num_gpus, graph_size, graph_chunk_size, input_graph_degree);
-
-  //
-  uint32_t* num_no_detour_edges;  // [graph_size]
-  array_size          = sizeof(uint32_t) * graph_size;
-  num_no_detour_edges = (uint32_t*)malloc(array_size);
-  memset(num_no_detour_edges, 0, array_size);
+  auto input_graph_ptr               = knn_graph.data_handle();
+  auto output_graph_ptr              = new_graph.data_handle();
+  const IdxT graph_size              = new_graph.extent(0);
 
-  uint32_t*** d_num_no_detour_edges = NULL;  // [...][num_gpus][graph_chunk_size]
-  d_num_no_detour_edges             = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, 1);
-  mgpu_H2D<uint32_t>(
-    d_num_no_detour_edges, num_no_detour_edges, num_gpus, graph_size, graph_chunk_size, 1);
+  auto pruned_graph = raft::make_host_matrix<IdxT, IdxT>(graph_size, output_graph_degree);
 
-  //
-  uint64_t** dev_stats  = NULL;  // [num_gpus][2]
-  uint64_t** host_stats = NULL;  // [num_gpus][2]
-  dev_stats             = (uint64_t**)malloc(sizeof(uint64_t*) * num_gpus);
-  host_stats            = (uint64_t**)malloc(sizeof(uint64_t*) * num_gpus);
-  array_size            = sizeof(uint64_t) * 2;
-  for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaMalloc(&(dev_stats[i_gpu]), array_size));
-    host_stats[i_gpu] = (uint64_t*)malloc(array_size);
-  }
-  RAFT_CUDA_TRY(cudaSetDevice(0));
+  {
+    //
+    // Prune kNN graph
+    //
+    auto d_input_graph = raft::make_device_matrix<IdxT, IdxT>(res, graph_size, input_graph_degree);
+
+    auto detour_count = raft::make_host_matrix<uint8_t, IdxT>(graph_size, input_graph_degree);
+    auto d_detour_count =
+      raft::make_device_matrix<uint8_t, IdxT>(res, graph_size, input_graph_degree);
+    RAFT_CUDA_TRY(cudaMemsetAsync(d_detour_count.data_handle(),
+                                  0xff,
+                                  graph_size * input_graph_degree * sizeof(uint8_t),
+                                  res.get_stream()));
+
+    auto d_num_no_detour_edges = raft::make_device_vector<uint32_t, IdxT>(res, graph_size);
+    RAFT_CUDA_TRY(cudaMemsetAsync(
+      d_num_no_detour_edges.data_handle(), 0x00, graph_size * sizeof(uint32_t), res.get_stream()));
+
+    auto dev_stats  = raft::make_device_vector<uint64_t>(res, 2);
+    auto host_stats = raft::make_host_vector<uint64_t>(2);
+
+    //
+    // Prune unimportant edges.
+    //
+    // The edge to be retained is determined without explicitly considering
+    // distance or angle. Suppose the edge is the k-th edge of some node-A to
+    // node-B (A->B). Among the edges originating at node-A, there are k-1 edges
+    // shorter than the edge A->B. Each of these k-1 edges are connected to a
+    // different k-1 nodes. Among these k-1 nodes, count the number of nodes with
+    // edges to node-B, which is the number of 2-hop detours for the edge A->B.
+    // Once the number of 2-hop detours has been counted for all edges, the
+    // specified number of edges are picked up for each node, starting with the
+    // edge with the lowest number of 2-hop detours.
+    //
+    const double time_prune_start = cur_time();
+    RAFT_LOG_DEBUG("# Pruning kNN Graph on GPUs\r");
+
+    raft::copy(d_input_graph.data_handle(),
+               input_graph_ptr,
+               graph_size * input_graph_degree,
+               res.get_stream());
+    void (*kernel_prune)(const IdxT* const,
+                         const uint32_t,
+                         const uint32_t,
+                         const uint32_t,
+                         const uint32_t,
+                         const uint32_t,
+                         uint8_t* const,
+                         uint32_t* const,
+                         uint64_t* const);
 
-  //
-  // Prune unimportant edges.
-  //
-  // The edge to be retained is determined without explicitly considering
-  // distance or angle. Suppose the edge is the k-th edge of some node-A to
-  // node-B (A->B). Among the edges originating at node-A, there are k-1 edges
-  // shorter than the edge A->B. Each of these k-1 edges are connected to a
-  // different k-1 nodes. Among these k-1 nodes, count the number of nodes with
-  // edges to node-B, which is the number of 2-hop detours for the edge A->B.
-  // Once the number of 2-hop detours has been counted for all edges, the
-  // specified number of edges are picked up for each node, starting with the
-  // edge with the lowest number of 2-hop detours.
-  //
-  double time_prune_start = cur_time();
-  uint64_t num_keep       = 0;
-  uint64_t num_full       = 0;
-  RAFT_LOG_DEBUG("# Pruning kNN Graph on GPUs\r");
-  mgpu_H2D<uint32_t>(
-    d_input_graph_ptr, input_graph_ptr, num_gpus, graph_size, graph_chunk_size, input_graph_degree);
-  void (*kernel_prune)(uint32_t**,
-                       uint32_t,
-                       uint32_t,
-                       uint32_t,
-                       uint32_t,
-                       int,
-                       uint32_t,
-                       uint32_t,
-                       uint8_t**,
-                       uint32_t**,
-                       uint64_t*);
-  if (input_graph_degree <= 1024) {
     constexpr int MAX_DEGREE = 1024;
-    kernel_prune             = kern_prune<MAX_DEGREE>;
-  } else {
-    fprintf(stderr,
-            "[ERROR] The degree of input knn graph is too large (%u). "
-            "It must be equal to or small than %d.\n",
-            input_graph_degree,
-            1024);
-    exit(-1);
-  }
-  uint32_t batch_size = std::min(graph_chunk_size, (uint32_t)256 * 1024);
-  uint32_t num_batch  = (graph_chunk_size + batch_size - 1) / batch_size;
-  dim3 threads_prune(32, 1, 1);
-  dim3 blocks_prune(batch_size, 1, 1);
-  for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaMemset(dev_stats[i_gpu], 0, sizeof(uint64_t) * 2));
-  }
-  for (uint32_t i_batch = 0; i_batch < num_batch; i_batch++) {
-    for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-      RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-      kernel_prune<<<blocks_prune, threads_prune>>>(d_input_graph_ptr[i_gpu],
-                                                    graph_size,
-                                                    graph_chunk_size,
-                                                    input_graph_degree,
-                                                    output_graph_degree,
-                                                    i_gpu,
-                                                    batch_size,
-                                                    i_batch,
-                                                    d_detour_count[i_gpu],
-                                                    d_num_no_detour_edges[i_gpu],
-                                                    dev_stats[i_gpu]);
+    if (input_graph_degree <= MAX_DEGREE) {
+      kernel_prune = kern_prune<MAX_DEGREE, IdxT>;
+    } else {
+      RAFT_LOG_ERROR(
+        "[ERROR] The degree of input knn graph is too large (%u). "
+        "It must be equal to or small than %d.\n",
+        input_graph_degree,
+        1024);
+      exit(-1);
     }
-    RAFT_CUDA_TRY(cudaDeviceSynchronize());
-    fprintf(
-      stderr,
-      "# Pruning kNN Graph on GPUs (%.1lf %%)\r",
-      (double)std::min((i_batch + 1) * batch_size, graph_chunk_size) / graph_chunk_size * 100);
-  }
-  for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
+    const uint32_t batch_size =
+      std::min(static_cast<uint32_t>(graph_size), static_cast<uint32_t>(256 * 1024));
+    const uint32_t num_batch = (graph_size + batch_size - 1) / batch_size;
+    const dim3 threads_prune(32, 1, 1);
+    const dim3 blocks_prune(batch_size, 1, 1);
+
     RAFT_CUDA_TRY(
-      cudaMemcpy(host_stats[i_gpu], dev_stats[i_gpu], sizeof(uint64_t) * 2, cudaMemcpyDefault));
-    num_keep += host_stats[i_gpu][0];
-    num_full += host_stats[i_gpu][1];
-  }
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-  RAFT_LOG_DEBUG("\n");
+      cudaMemsetAsync(dev_stats.data_handle(), 0, sizeof(uint64_t) * 2, res.get_stream()));
+
+    for (uint32_t i_batch = 0; i_batch < num_batch; i_batch++) {
+      kernel_prune<<<blocks_prune, threads_prune, 0, res.get_stream()>>>(
+        d_input_graph.data_handle(),
+        graph_size,
+        input_graph_degree,
+        output_graph_degree,
+        batch_size,
+        i_batch,
+        d_detour_count.data_handle(),
+        d_num_no_detour_edges.data_handle(),
+        dev_stats.data_handle());
+      res.sync_stream();
+      RAFT_LOG_DEBUG(
+        "# Pruning kNN Graph on GPUs (%.1lf %%)\r",
+        (double)std::min<IdxT>((i_batch + 1) * batch_size, graph_size) / graph_size * 100);
+    }
+    res.sync_stream();
+    RAFT_LOG_DEBUG("\n");
 
-  mgpu_D2H<uint8_t>(
-    d_detour_count, detour_count, num_gpus, graph_size, graph_chunk_size, input_graph_degree);
-  mgpu_D2H<uint32_t>(
-    d_num_no_detour_edges, num_no_detour_edges, num_gpus, graph_size, graph_chunk_size, 1);
+    raft::copy(detour_count.data_handle(),
+               d_detour_count.data_handle(),
+               graph_size * input_graph_degree,
+               res.get_stream());
 
-  mgpu_free<uint32_t>(d_input_graph_ptr, num_gpus);
-  mgpu_free<uint8_t>(d_detour_count, num_gpus);
-  mgpu_free<uint32_t>(d_num_no_detour_edges, num_gpus);
+    raft::copy(host_stats.data_handle(), dev_stats.data_handle(), 2, res.get_stream());
+    const auto num_keep = host_stats.data_handle()[0];
+    const auto num_full = host_stats.data_handle()[1];
 
-  // Create pruned kNN graph
-  array_size                 = sizeof(uint32_t) * graph_size * output_graph_degree;
-  uint32_t* pruned_graph_ptr = (uint32_t*)malloc(array_size);
-  uint32_t max_detour        = 0;
+    // Create pruned kNN graph
+    uint32_t max_detour = 0;
 #pragma omp parallel for reduction(max : max_detour)
-  for (uint64_t i = 0; i < graph_size; i++) {
-    uint64_t pk = 0;
-    for (uint32_t num_detour = 0; num_detour < output_graph_degree; num_detour++) {
-      if (max_detour < num_detour) { max_detour = num_detour; /* stats */ }
-      for (uint64_t k = 0; k < input_graph_degree; k++) {
-        if (detour_count[k + (input_graph_degree * i)] != num_detour) { continue; }
-        pruned_graph_ptr[pk + (output_graph_degree * i)] =
-          input_graph_ptr[k + (input_graph_degree * i)];
-        pk += 1;
+    for (uint64_t i = 0; i < graph_size; i++) {
+      uint64_t pk = 0;
+      for (uint32_t num_detour = 0; num_detour < output_graph_degree; num_detour++) {
+        if (max_detour < num_detour) { max_detour = num_detour; /* stats */ }
+        for (uint64_t k = 0; k < input_graph_degree; k++) {
+          if (detour_count.data_handle()[k + (input_graph_degree * i)] != num_detour) { continue; }
+          pruned_graph.data_handle()[pk + (output_graph_degree * i)] =
+            input_graph_ptr[k + (input_graph_degree * i)];
+          pk += 1;
+          if (pk >= output_graph_degree) break;
+        }
         if (pk >= output_graph_degree) break;
       }
-      if (pk >= output_graph_degree) break;
+      assert(pk == output_graph_degree);
     }
-    assert(pk == output_graph_degree);
-  }
-  // RAFT_LOG_DEBUG("# max_detour: %u\n", max_detour);
-
-  double time_prune_end = cur_time();
-  fprintf(stderr,
-          "# Pruning time: %.1lf sec, "
-          "avg_no_detour_edges_per_node: %.2lf/%u, "
-          "nodes_with_no_detour_at_all_edges: %.1lf%%\n",
-          time_prune_end - time_prune_start,
-          (double)num_keep / graph_size,
-          output_graph_degree,
-          (double)num_full / graph_size * 100);
+    // RAFT_LOG_DEBUG("# max_detour: %u\n", max_detour);
 
-  //
-  // Make reverse graph
-  //
-  double time_make_start = cur_time();
-
-  array_size              = sizeof(uint32_t) * graph_size * output_graph_degree;
-  uint32_t* rev_graph_ptr = (uint32_t*)malloc(array_size);
-  memset(rev_graph_ptr, 0xff, array_size);
-
-  uint32_t*** d_rev_graph_ptr;  // [...][num_gpus][graph_chunk_size, output_graph_degree]
-  d_rev_graph_ptr = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, output_graph_degree);
-  mgpu_H2D<uint32_t>(
-    d_rev_graph_ptr, rev_graph_ptr, num_gpus, graph_size, graph_chunk_size, output_graph_degree);
-
-  array_size                = sizeof(uint32_t) * graph_size;
-  uint32_t* rev_graph_count = (uint32_t*)malloc(array_size);
-  memset(rev_graph_count, 0, array_size);
-
-  uint32_t*** d_rev_graph_count;  // [...][num_gpus][graph_chunk_size, 1]
-  d_rev_graph_count = mgpu_alloc<uint32_t>(num_gpus, graph_chunk_size, 1);
-  mgpu_H2D<uint32_t>(d_rev_graph_count, rev_graph_count, num_gpus, graph_size, graph_chunk_size, 1);
-
-  uint32_t* dest_nodes;     // [graph_size]
-  dest_nodes = (uint32_t*)malloc(sizeof(uint32_t) * graph_size);
-  uint32_t** d_dest_nodes;  // [num_gpus][graph_size]
-  d_dest_nodes = (uint32_t**)malloc(sizeof(uint32_t*) * num_gpus);
-  for (int i_gpu = 0; i_gpu < num_gpus; i_gpu++) {
-    RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-    RAFT_CUDA_TRY(cudaMalloc(&(d_dest_nodes[i_gpu]), sizeof(uint32_t) * graph_size));
+    const double time_prune_end = cur_time();
+    RAFT_LOG_DEBUG(
+      "# Pruning time: %.1lf sec, "
+      "avg_no_detour_edges_per_node: %.2lf/%u, "
+      "nodes_with_no_detour_at_all_edges: %.1lf%%\n",
+      time_prune_end - time_prune_start,
+      (double)num_keep / graph_size,
+      output_graph_degree,
+      (double)num_full / graph_size * 100);
   }
 
-  for (uint64_t k = 0; k < output_graph_degree; k++) {
+  auto rev_graph       = raft::make_host_matrix<IdxT, IdxT>(graph_size, output_graph_degree);
+  auto rev_graph_count = raft::make_host_vector<uint32_t, IdxT>(graph_size);
+
+  {
+    //
+    // Make reverse graph
+    //
+    const double time_make_start = cur_time();
+
+    auto d_rev_graph = raft::make_device_matrix<IdxT, IdxT>(res, graph_size, output_graph_degree);
+    RAFT_CUDA_TRY(cudaMemsetAsync(d_rev_graph.data_handle(),
+                                  0xff,
+                                  graph_size * output_graph_degree * sizeof(IdxT),
+                                  res.get_stream()));
+
+    auto d_rev_graph_count = raft::make_device_vector<uint32_t, IdxT>(res, graph_size);
+    RAFT_CUDA_TRY(cudaMemsetAsync(
+      d_rev_graph_count.data_handle(), 0x00, graph_size * sizeof(uint32_t), res.get_stream()));
+
+    auto dest_nodes   = raft::make_host_vector<IdxT, IdxT>(graph_size);
+    auto d_dest_nodes = raft::make_device_vector<IdxT, IdxT>(res, graph_size);
+
+    for (uint64_t k = 0; k < output_graph_degree; k++) {
 #pragma omp parallel for
-    for (uint64_t i = 0; i < graph_size; i++) {
-      dest_nodes[i] = pruned_graph_ptr[k + (output_graph_degree * i)];
-    }
-    RAFT_CUDA_TRY(cudaDeviceSynchronize());
-#pragma omp parallel num_threads(num_gpus)
-    {
-      int i_gpu = omp_get_thread_num();
-      RAFT_CUDA_TRY(cudaSetDevice(i_gpu));
-      RAFT_CUDA_TRY(cudaMemcpy(
-        d_dest_nodes[i_gpu], dest_nodes, sizeof(uint32_t) * graph_size, cudaMemcpyHostToDevice));
+      for (uint64_t i = 0; i < graph_size; i++) {
+        dest_nodes.data_handle()[i] = pruned_graph.data_handle()[k + (output_graph_degree * i)];
+      }
+      res.sync_stream();
+
+      raft::copy(
+        d_dest_nodes.data_handle(), dest_nodes.data_handle(), graph_size, res.get_stream());
+
       dim3 threads(256, 1, 1);
       dim3 blocks(1024, 1, 1);
-      kern_make_rev_graph<<<blocks, threads>>>(i_gpu,
-                                               d_dest_nodes[i_gpu],
-                                               graph_size,
-                                               d_rev_graph_ptr[num_gpus][i_gpu],
-                                               d_rev_graph_count[num_gpus][i_gpu],
-                                               graph_chunk_size,
-                                               output_graph_degree);
+      kern_make_rev_graph<<<blocks, threads, 0, res.get_stream()>>>(d_dest_nodes.data_handle(),
+                                                                    d_rev_graph.data_handle(),
+                                                                    d_rev_graph_count.data_handle(),
+                                                                    graph_size,
+                                                                    output_graph_degree);
+      RAFT_LOG_DEBUG("# Making reverse graph on GPUs: %lu / %u    \r", k, output_graph_degree);
     }
-    RAFT_LOG_DEBUG("# Making reverse graph on GPUs: %lu / %u    \r", k, output_graph_degree);
-  }
-  RAFT_CUDA_TRY(cudaDeviceSynchronize());
-  RAFT_CUDA_TRY(cudaSetDevice(0));
-  RAFT_LOG_DEBUG("\n");
 
-  mgpu_D2H<uint32_t>(
-    d_rev_graph_ptr, rev_graph_ptr, num_gpus, graph_size, graph_chunk_size, output_graph_degree);
-  mgpu_D2H<uint32_t>(d_rev_graph_count, rev_graph_count, num_gpus, graph_size, graph_chunk_size, 1);
-  mgpu_free<uint32_t>(d_rev_graph_ptr, num_gpus);
-  mgpu_free<uint32_t>(d_rev_graph_count, num_gpus);
+    res.sync_stream();
+    RAFT_LOG_DEBUG("\n");
 
-  double time_make_end = cur_time();
-  RAFT_LOG_DEBUG("# Making reverse graph time: %.1lf sec", time_make_end - time_make_start);
+    raft::copy(rev_graph.data_handle(),
+               d_rev_graph.data_handle(),
+               graph_size * output_graph_degree,
+               res.get_stream());
+    raft::copy(
+      rev_graph_count.data_handle(), d_rev_graph_count.data_handle(), graph_size, res.get_stream());
 
-  //
-  // Replace some edges with reverse edges
-  //
-  double time_replace_start = cur_time();
+    const double time_make_end = cur_time();
+    RAFT_LOG_DEBUG("# Making reverse graph time: %.1lf sec", time_make_end - time_make_start);
+  }
+
+  {
+    //
+    // Replace some edges with reverse edges
+    //
+    const double time_replace_start = cur_time();
 
-  uint64_t num_protected_edges = output_graph_degree / 2;
-  RAFT_LOG_DEBUG("# num_protected_edges: %lu", num_protected_edges);
+    const uint64_t num_protected_edges = output_graph_degree / 2;
+    RAFT_LOG_DEBUG("# num_protected_edges: %lu", num_protected_edges);
 
-  array_size = sizeof(uint32_t) * graph_size * output_graph_degree;
-  memcpy(output_graph_ptr, pruned_graph_ptr, array_size);
+    memcpy(output_graph_ptr,
+           pruned_graph.data_handle(),
+           sizeof(uint32_t) * graph_size * output_graph_degree);
 
-  constexpr int _omp_chunk = 1024;
+    constexpr int _omp_chunk = 1024;
 #pragma omp parallel for schedule(dynamic, _omp_chunk)
-  for (uint64_t j = 0; j < graph_size; j++) {
-    for (uint64_t _k = 0; _k < rev_graph_count[j]; _k++) {
-      uint64_t k = rev_graph_count[j] - 1 - _k;
-      uint64_t i = rev_graph_ptr[k + (output_graph_degree * j)];
-
-      uint64_t pos = pos_in_array<uint32_t>(
-        i, output_graph_ptr + (output_graph_degree * j), output_graph_degree);
-      if (pos < num_protected_edges) { continue; }
-      uint64_t num_shift = pos - num_protected_edges;
-      if (pos == output_graph_degree) { num_shift = output_graph_degree - num_protected_edges - 1; }
-      shift_array<uint32_t>(output_graph_ptr + num_protected_edges + (output_graph_degree * j),
-                            num_shift);
-      output_graph_ptr[num_protected_edges + (output_graph_degree * j)] = i;
-    }
-    if ((omp_get_thread_num() == 0) && ((j % _omp_chunk) == 0)) {
-      RAFT_LOG_DEBUG("# Replacing reverse edges: %lu / %lu    ", j, graph_size);
+    for (uint64_t j = 0; j < graph_size; j++) {
+      for (uint64_t _k = 0; _k < rev_graph_count.data_handle()[j]; _k++) {
+        uint64_t k = rev_graph_count.data_handle()[j] - 1 - _k;
+        uint64_t i = rev_graph.data_handle()[k + (output_graph_degree * j)];
+
+        uint64_t pos = pos_in_array<uint32_t>(
+          i, output_graph_ptr + (output_graph_degree * j), output_graph_degree);
+        if (pos < num_protected_edges) { continue; }
+        uint64_t num_shift = pos - num_protected_edges;
+        if (pos == output_graph_degree) {
+          num_shift = output_graph_degree - num_protected_edges - 1;
+        }
+        shift_array<uint32_t>(output_graph_ptr + num_protected_edges + (output_graph_degree * j),
+                              num_shift);
+        output_graph_ptr[num_protected_edges + (output_graph_degree * j)] = i;
+      }
+      if ((omp_get_thread_num() == 0) && ((j % _omp_chunk) == 0)) {
+        RAFT_LOG_DEBUG("# Replacing reverse edges: %lu / %lu    ", j, graph_size);
+      }
     }
-  }
-  RAFT_LOG_DEBUG("\n");
-  free(rev_graph_ptr);
-  free(rev_graph_count);
+    RAFT_LOG_DEBUG("\n");
 
-  double time_replace_end = cur_time();
-  RAFT_LOG_DEBUG("# Replacing edges time: %.1lf sec", time_replace_end - time_replace_start);
+    const double time_replace_end = cur_time();
+    RAFT_LOG_DEBUG("# Replacing edges time: %.1lf sec", time_replace_end - time_replace_start);
 
-  /* stats */
-  uint64_t num_replaced_edges = 0;
+    /* stats */
+    uint64_t num_replaced_edges = 0;
 #pragma omp parallel for reduction(+ : num_replaced_edges)
-  for (uint64_t i = 0; i < graph_size; i++) {
-    for (uint64_t k = 0; k < output_graph_degree; k++) {
-      uint64_t j   = pruned_graph_ptr[k + (output_graph_degree * i)];
-      uint64_t pos = pos_in_array<uint32_t>(
-        j, output_graph_ptr + (output_graph_degree * i), output_graph_degree);
-      if (pos == output_graph_degree) { num_replaced_edges += 1; }
+    for (uint64_t i = 0; i < graph_size; i++) {
+      for (uint64_t k = 0; k < output_graph_degree; k++) {
+        const uint64_t j   = pruned_graph.data_handle()[k + (output_graph_degree * i)];
+        const uint64_t pos = pos_in_array<uint32_t>(
+          j, output_graph_ptr + (output_graph_degree * i), output_graph_degree);
+        if (pos == output_graph_degree) { num_replaced_edges += 1; }
+      }
     }
+    RAFT_LOG_DEBUG("# Average number of replaced edges per node: %.2f",
+                   (double)num_replaced_edges / graph_size);
   }
-  fprintf(stderr,
-          "# Average number of replaced edges per node: %.2f",
-          (double)num_replaced_edges / graph_size);
 }
 
 }  // namespace graph

From 5524cb9b7e085b8336c82ea9e89fbafd1a7bb002 Mon Sep 17 00:00:00 2001
From: Allard Hendriksen <ahendriksen@nvidia.com>
Date: Tue, 16 May 2023 01:19:33 +0200
Subject: [PATCH 53/78] Fix linalg::map to work with non-power-of-2-sized types
 again (#1453)

Closes issue: #1413.

Adds back the ability for `raft::linalg::map` to work with non-power-of-2-sized types.

Authors:
  - Allard Hendriksen (https://github.com/ahendriksen)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1453
---
 cpp/include/raft/linalg/detail/map.cuh |  17 +++-
 cpp/test/linalg/map.cu                 | 121 ++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 7 deletions(-)

diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh
index 90b653b711..c4959e6812 100644
--- a/cpp/include/raft/linalg/detail/map.cuh
+++ b/cpp/include/raft/linalg/detail/map.cuh
@@ -116,15 +116,26 @@ struct ratio_selector {
   template <typename T>
   constexpr static auto ignoring_alignment() -> ratio_selector
   {
-    return ratio_selector{raft::div_rounding_up_safe<size_t>(kCoalescedVectorSize, sizeof(T)), 0};
+    constexpr bool T_evenly_fits_in_cache_line = (kCoalescedVectorSize % sizeof(T)) == 0;
+
+    if constexpr (T_evenly_fits_in_cache_line) {
+      return ratio_selector{size_t(kCoalescedVectorSize / sizeof(T)), 0};
+    } else {
+      return ratio_selector{1, 0};
+    }
   }
 
   template <typename T>
   explicit ratio_selector(const T* ptr)
   {
     constexpr auto s = ignoring_alignment<T>();  // NOLINT
-    align            = int(Pow2<sizeof(T) * s.ratio>::roundUp(ptr) - ptr);
-    ratio            = int(s.ratio);
+
+    if constexpr (s.ratio == 1) {
+      align = 0;
+    } else {
+      align = int(Pow2<sizeof(T) * s.ratio>::roundUp(ptr) - ptr);
+    }
+    ratio = int(s.ratio);
   }
 };
 
diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu
index 15b40808ee..8f2c3ed372 100644
--- a/cpp/test/linalg/map.cu
+++ b/cpp/test/linalg/map.cu
@@ -17,14 +17,69 @@
 #include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/linalg/map.cuh>
+#include <raft/matrix/init.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
 namespace linalg {
 
+/*
+ * Padded_float is a 12 byte type that contains a single float. Two integers are
+ * used for padding. It is used to test types that are not power-of-two-sized.
+ */
+struct padded_float {
+  float value_;
+  int padding1;
+  int padding2;
+
+  padded_float() = default;
+  constexpr padded_float(const float& x) : value_(x), padding1(0), padding2(0) {}
+  constexpr padded_float(const padded_float&)            = default;
+  constexpr padded_float& operator=(const padded_float&) = default;
+  constexpr float abs() const { return std::abs(value_); }
+};
+
+constexpr padded_float operator+(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ + y.value_);
+}
+
+constexpr padded_float operator-(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ - y.value_);
+}
+constexpr padded_float operator*(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ * y.value_);
+}
+constexpr padded_float operator*(const padded_float& x, const int& scalar)
+{
+  return padded_float(scalar * x.value_);
+}
+constexpr bool operator==(const padded_float& x, const padded_float& y)
+{
+  return x.value_ == y.value_;
+}
+
+constexpr bool operator<(const padded_float& x, const padded_float& y)
+{
+  return x.value_ < y.value_;
+}
+constexpr bool operator>(const padded_float& x, const padded_float& y)
+{
+  return x.value_ > y.value_;
+}
+inline auto operator<<(std::ostream& os, const padded_float& x) -> std::ostream&
+{
+  os << x.value_;
+  return os;
+}
+
 template <typename InType, typename IdxType, typename OutType>
 void mapLaunch(OutType* out,
                const InType* in1,
@@ -86,15 +141,38 @@ class MapTest : public ::testing::TestWithParam<MapInputs<InType, IdxType, OutTy
   {
   }
 
- protected:
   void SetUp() override
   {
     raft::random::RngState r(params.seed);
 
     IdxType len = params.len;
-    uniform(handle, r, in1.data(), len, InType(-1.0), InType(1.0));
-    uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0));
-    uniform(handle, r, in3.data(), len, InType(-1.0), InType(1.0));
+    if constexpr (std::is_floating_point<InType>::value) {
+      uniform(handle, r, in1.data(), len, InType(-1.0), InType(1.0));
+      uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0));
+      uniform(handle, r, in3.data(), len, InType(-1.0), InType(1.0));
+    } else {
+      // First create random float arrays
+      rmm::device_uvector<float> fin1(params.len, stream);
+      rmm::device_uvector<float> fin2(params.len, stream);
+      rmm::device_uvector<float> fin3(params.len, stream);
+      uniform(handle, r, fin1.data(), len, float(-1.0), float(1.0));
+      uniform(handle, r, fin2.data(), len, float(-1.0), float(1.0));
+      uniform(handle, r, fin3.data(), len, float(-1.0), float(1.0));
+
+      // Then pad them
+      raft::device_resources handle{stream};
+      auto fin1_view = raft::make_device_vector_view(fin1.data(), fin1.size());
+      auto fin2_view = raft::make_device_vector_view(fin2.data(), fin2.size());
+      auto fin3_view = raft::make_device_vector_view(fin3.data(), fin3.size());
+      auto in1_view  = raft::make_device_vector_view(in1.data(), in1.size());
+      auto in2_view  = raft::make_device_vector_view(in2.data(), in2.size());
+      auto in3_view  = raft::make_device_vector_view(in3.data(), in3.size());
+
+      auto add_padding = [] __device__(float a) { return padded_float(a); };
+      raft::linalg::map(handle, in1_view, add_padding, raft::make_const_mdspan(fin1_view));
+      raft::linalg::map(handle, in2_view, add_padding, raft::make_const_mdspan(fin2_view));
+      raft::linalg::map(handle, in3_view, add_padding, raft::make_const_mdspan(fin3_view));
+    }
 
     create_ref(out_ref.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream);
     mapLaunch(out.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream);
@@ -175,5 +253,40 @@ const std::vector<MapInputs<double, size_t>> inputsd_i64 = {
 MAP_TEST((MapTest<double, size_t>), MapTestD_i64, inputsd_i64);
 MAP_TEST((MapOffsetTest<double, size_t>), MapOffsetTestD_i64, inputsd_i64);
 
+// This comparison structure is necessary, because it is not straight-forward to
+// add an overload of std::abs for padded_float.
+struct ComparePadded {
+  float eps;
+  ComparePadded(float eps_) : eps(eps_) {}
+  ComparePadded(padded_float eps_) : eps(eps_.value_) {}
+  ComparePadded(double eps_) : eps(eps_) {}
+  bool operator()(const padded_float& a, const padded_float& b) const
+  {
+    float diff  = (a - b).abs();
+    float m     = std::max(a.abs(), b.abs());
+    float ratio = diff > eps ? diff / m : diff;
+    return (ratio <= eps);
+  }
+};
+
+// Use PaddedComparison
+#define MAP_TEST_PADDED(test_type, test_name, inputs)                \
+  typedef RAFT_DEPAREN(test_type) test_name;                         \
+  TEST_P(test_name, Result)                                          \
+  {                                                                  \
+    ASSERT_TRUE(devArrMatch(this->out_ref.data(),                    \
+                            this->out.data(),                        \
+                            this->params.len,                        \
+                            ComparePadded(this->params.tolerance))); \
+  }                                                                  \
+  INSTANTIATE_TEST_SUITE_P(MapTests, test_name, ::testing::ValuesIn(inputs))
+
+const std::vector<MapInputs<padded_float, size_t>> inputsd_padded_float = {
+  {0.00000001, 1024 * 1024, 1234ULL, 5.2}};
+MAP_TEST_PADDED((MapTest<padded_float, size_t>), MapTestD_padded_float, inputsd_padded_float);
+MAP_TEST_PADDED((MapOffsetTest<padded_float, size_t>),
+                MapOffsetTestD_padded_float,
+                inputsd_padded_float);
+
 }  // namespace linalg
 }  // namespace raft

From e97e7bfb46795c286b478c074362ba22a76ad446 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 16 May 2023 14:23:56 -0700
Subject: [PATCH 54/78] Switch back to using primary shared-action-workflows
 branch (#1519)

This PR unpins the workflows using the cuda-120-pip branch

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1519
---
 .github/workflows/build.yaml | 8 ++++----
 .github/workflows/pr.yaml    | 8 ++++----
 .github/workflows/test.yaml  | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 80e0c8b216..0f5f84c158 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -66,7 +66,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -78,7 +78,7 @@ jobs:
   wheel-publish-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -88,7 +88,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-publish-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -100,7 +100,7 @@ jobs:
   wheel-publish-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index fcb155d651..28efc135b2 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -67,7 +67,7 @@ jobs:
   wheel-build-pylibraft:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -76,7 +76,7 @@ jobs:
   wheel-tests-pylibraft:
     needs: wheel-build-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: pylibraft
@@ -85,7 +85,7 @@ jobs:
   wheel-build-raft-dask:
     needs: wheel-tests-pylibraft
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
@@ -95,7 +95,7 @@ jobs:
   wheel-tests-raft-dask:
     needs: wheel-build-raft-dask
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: pull-request
       package-name: raft_dask
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index d389c4e2a9..ffd7fa3bcb 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -32,7 +32,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-pylibraft:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test"
   wheel-tests-raft-dask:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120-pip
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}

From d891c0064d1d5be070aacd1f0a9746d30879f631 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <github@benfrederickson.com>
Date: Tue, 16 May 2023 14:25:19 -0700
Subject: [PATCH 55/78] Migrate from raft::device_resources -> raft::resources
 (#1510)

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1510
---
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |   3 +-
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h  |  18 +-
 cpp/bench/prims/cluster/kmeans_balanced.cu    |   3 +-
 cpp/bench/prims/common/benchmark.hpp          |   5 +-
 cpp/bench/prims/distance/fused_l2_nn.cu       |   3 +-
 cpp/bench/prims/distance/kernels.cu           |   3 +-
 cpp/bench/prims/matrix/argmin.cu              |   3 +-
 cpp/bench/prims/matrix/gather.cu              |   3 +-
 cpp/bench/prims/neighbors/knn.cuh             |  19 +-
 .../raft/cluster/detail/agglomerative.cuh     |  16 +-
 .../raft/cluster/detail/connectivities.cuh    |  26 +-
 cpp/include/raft/cluster/detail/kmeans.cuh    |  78 ++--
 .../cluster/detail/kmeans_auto_find_k.cuh     |   9 +-
 .../raft/cluster/detail/kmeans_balanced.cuh   |  53 +--
 .../raft/cluster/detail/kmeans_common.cuh     |  48 +--
 .../raft/cluster/detail/kmeans_deprecated.cuh |  41 ++-
 cpp/include/raft/cluster/detail/mst.cuh       |   9 +-
 .../raft/cluster/detail/single_linkage.cuh    |   5 +-
 cpp/include/raft/cluster/kmeans.cuh           |  77 ++--
 cpp/include/raft/cluster/kmeans_balanced.cuh  |  15 +-
 .../raft/cluster/kmeans_deprecated.cuh        |   2 +-
 cpp/include/raft/cluster/single_linkage.cuh   |   4 +-
 cpp/include/raft/comms/comms_test.hpp         |  26 +-
 cpp/include/raft/comms/detail/mpi_comms.hpp   |   2 +-
 cpp/include/raft/comms/detail/std_comms.hpp   |   2 +-
 cpp/include/raft/comms/detail/test.hpp        |  88 ++---
 cpp/include/raft/comms/mpi_comms.hpp          |  16 +-
 cpp/include/raft/comms/std_comms.hpp          |  42 +--
 .../core/detail/mdspan_numpy_serializer.hpp   |   3 +-
 cpp/include/raft/core/device_coo_matrix.hpp   |  28 +-
 cpp/include/raft/core/device_csr_matrix.hpp   |  36 +-
 cpp/include/raft/core/device_mdarray.hpp      |   4 +-
 cpp/include/raft/core/device_resources.hpp    |   2 +-
 cpp/include/raft/core/mdarray.hpp             |   2 +-
 .../raft/core/resource/cuda_stream_pool.hpp   |   4 +
 .../raft/core/resource/thrust_policy.hpp      |   3 +-
 cpp/include/raft/core/serialize.hpp           |  33 +-
 cpp/include/raft/core/sparse_types.hpp        |   2 +-
 .../raft/core/temporary_device_buffer.hpp     |  33 +-
 .../raft/distance/detail/compress_to_bits.cuh |   5 +-
 .../distance/detail/kernels/gram_matrix.cuh   |  25 +-
 .../detail/kernels/kernel_matrices.cuh        |  91 +++--
 .../raft/distance/detail/masked_nn.cuh        |   8 +-
 cpp/include/raft/distance/distance-inl.cuh    |   4 +-
 cpp/include/raft/distance/fused_l2_nn-ext.cuh |   2 +-
 cpp/include/raft/distance/fused_l2_nn-inl.cuh |   2 +-
 .../raft/distance/fused_l2_nn_helpers.cuh     |   7 +-
 cpp/include/raft/distance/masked_nn.cuh       |   2 +-
 cpp/include/raft/linalg/add.cuh               |  25 +-
 cpp/include/raft/linalg/axpy.cuh              |  15 +-
 cpp/include/raft/linalg/binary_op.cuh         |   6 +-
 .../raft/linalg/cholesky_r1_update.cuh        |   5 +-
 .../raft/linalg/coalesced_reduction.cuh       |  11 +-
 cpp/include/raft/linalg/detail/axpy.cuh       |   7 +-
 .../raft/linalg/detail/cholesky_r1_update.cuh |  19 +-
 cpp/include/raft/linalg/detail/eig.cuh        |  19 +-
 cpp/include/raft/linalg/detail/gemv.hpp       |  19 +-
 cpp/include/raft/linalg/detail/lanczos.cuh    |  34 +-
 cpp/include/raft/linalg/detail/lstsq.cuh      |  25 +-
 cpp/include/raft/linalg/detail/map.cuh        |  22 +-
 .../raft/linalg/detail/map_then_reduce.cuh    |   2 +-
 .../raft/linalg/detail/matrix_vector_op.cuh   |   8 +-
 cpp/include/raft/linalg/detail/rsvd.cuh       |  10 +-
 cpp/include/raft/linalg/detail/svd.cuh        |  29 +-
 cpp/include/raft/linalg/divide.cuh            |   9 +-
 cpp/include/raft/linalg/dot.cuh               |  20 +-
 cpp/include/raft/linalg/eig.cuh               |  25 +-
 cpp/include/raft/linalg/gemm.cuh              |   7 +-
 cpp/include/raft/linalg/gemv.cuh              |  17 +-
 cpp/include/raft/linalg/lstsq.cuh             |  35 +-
 cpp/include/raft/linalg/map.cuh               |  39 +-
 cpp/include/raft/linalg/map_reduce.cuh        |   7 +-
 cpp/include/raft/linalg/matrix_vector.cuh     |  21 +-
 cpp/include/raft/linalg/matrix_vector_op.cuh  |  15 +-
 .../raft/linalg/mean_squared_error.cuh        |  13 +-
 cpp/include/raft/linalg/multiply.cuh          |   9 +-
 cpp/include/raft/linalg/norm.cuh              |   9 +-
 cpp/include/raft/linalg/normalize.cuh         |  11 +-
 cpp/include/raft/linalg/power.cuh             |  17 +-
 cpp/include/raft/linalg/qr.cuh                |   4 +-
 cpp/include/raft/linalg/reduce.cuh            |   7 +-
 .../raft/linalg/reduce_cols_by_key.cuh        |   9 +-
 .../raft/linalg/reduce_rows_by_key.cuh        |  11 +-
 cpp/include/raft/linalg/rsvd.cuh              |  53 +--
 cpp/include/raft/linalg/sqrt.cuh              |   9 +-
 cpp/include/raft/linalg/strided_reduction.cuh |  11 +-
 cpp/include/raft/linalg/subtract.cuh          |  25 +-
 cpp/include/raft/linalg/svd.cuh               |  41 ++-
 cpp/include/raft/linalg/ternary_op.cuh        |   6 +-
 cpp/include/raft/linalg/unary_op.cuh          |   9 +-
 cpp/include/raft/matrix/argmax.cuh            |  10 +-
 cpp/include/raft/matrix/argmin.cuh            |  10 +-
 cpp/include/raft/matrix/col_wise_sort.cuh     |   7 +-
 cpp/include/raft/matrix/copy.cuh              |  25 +-
 cpp/include/raft/matrix/detail/math.cuh       |   4 +-
 cpp/include/raft/matrix/detail/matrix.cuh     |   7 +-
 cpp/include/raft/matrix/detail/print.hpp      |   2 +-
 .../raft/matrix/detail/select_radix.cuh       |   3 +-
 cpp/include/raft/matrix/gather.cuh            |  11 +-
 cpp/include/raft/matrix/init.cuh              |  12 +-
 cpp/include/raft/matrix/linewise_op.cuh       |  19 +-
 cpp/include/raft/matrix/math.cuh              |   2 +-
 cpp/include/raft/matrix/matrix.cuh            |  11 +-
 cpp/include/raft/matrix/norm.cuh              |   5 +-
 cpp/include/raft/matrix/power.cuh             |  20 +-
 cpp/include/raft/matrix/print.cuh             |   5 +-
 cpp/include/raft/matrix/ratio.cuh             |  16 +-
 cpp/include/raft/matrix/reciprocal.cuh        |   9 +-
 cpp/include/raft/matrix/reverse.cuh           |  17 +-
 cpp/include/raft/matrix/select_k.cuh          |   7 +-
 cpp/include/raft/matrix/sign_flip.cuh         |   6 +-
 cpp/include/raft/matrix/slice.cuh             |   5 +-
 cpp/include/raft/matrix/sqrt.cuh              |  24 +-
 cpp/include/raft/matrix/threshold.cuh         |  10 +-
 cpp/include/raft/neighbors/ball_cover-ext.cuh |  20 +-
 cpp/include/raft/neighbors/ball_cover-inl.cuh |  22 +-
 .../raft/neighbors/ball_cover_types.hpp       |   8 +-
 .../raft/neighbors/brute_force-ext.cuh        |  14 +-
 .../raft/neighbors/brute_force-inl.cuh        |  23 +-
 cpp/include/raft/neighbors/cagra.cuh          |  12 +-
 .../raft/neighbors/cagra_serialize.cuh        |  24 +-
 cpp/include/raft/neighbors/cagra_types.hpp    |  19 +-
 .../neighbors/detail/cagra/cagra_build.cuh    |  29 +-
 .../neighbors/detail/cagra/cagra_search.cuh   |   8 +-
 .../detail/cagra/cagra_serialize.cuh          |   8 +-
 .../raft/neighbors/detail/cagra/factory.cuh   |  13 +-
 .../neighbors/detail/cagra/graph_core.cuh     |  93 +++--
 .../detail/cagra/search_multi_cta.cuh         |  28 +-
 .../detail/cagra/search_multi_kernel.cuh      |  37 +-
 .../neighbors/detail/cagra/search_plan.cuh    |  19 +-
 .../detail/cagra/search_single_cta.cuh        |  16 +-
 .../raft/neighbors/detail/ivf_flat_build.cuh  |  23 +-
 .../neighbors/detail/ivf_flat_search-ext.cuh  |   4 +-
 .../neighbors/detail/ivf_flat_search-inl.cuh  |   9 +-
 .../neighbors/detail/ivf_flat_serialize.cuh   |  18 +-
 .../raft/neighbors/detail/ivf_pq_build.cuh    | 148 ++++----
 .../raft/neighbors/detail/ivf_pq_fp_8bit.cuh  |   2 +-
 .../raft/neighbors/detail/ivf_pq_search.cuh   |  37 +-
 .../neighbors/detail/ivf_pq_serialize.cuh     |  17 +-
 .../raft/neighbors/detail/knn_brute_force.cuh |  26 +-
 cpp/include/raft/neighbors/detail/refine.cuh  |  15 +-
 .../raft/neighbors/epsilon_neighborhood.cuh   |  11 +-
 cpp/include/raft/neighbors/ivf_flat-ext.cuh   |  38 +-
 cpp/include/raft/neighbors/ivf_flat-inl.cuh   |  20 +-
 .../raft/neighbors/ivf_flat_serialize.cuh     |  24 +-
 cpp/include/raft/neighbors/ivf_flat_types.hpp |  13 +-
 cpp/include/raft/neighbors/ivf_list.hpp       |  42 ++-
 cpp/include/raft/neighbors/ivf_list_types.hpp |   4 +-
 cpp/include/raft/neighbors/ivf_pq-ext.cuh     |  34 +-
 cpp/include/raft/neighbors/ivf_pq-inl.cuh     |  21 +-
 cpp/include/raft/neighbors/ivf_pq_helpers.cuh |  40 +-
 .../raft/neighbors/ivf_pq_serialize.cuh       |  26 +-
 cpp/include/raft/neighbors/ivf_pq_types.hpp   |   6 +-
 cpp/include/raft/neighbors/refine-ext.cuh     |  10 +-
 cpp/include/raft/neighbors/refine-inl.cuh     |   6 +-
 .../detail/ball_cover_lowdim.hpp              |   8 +-
 .../raft/solver/detail/lap_functions.cuh      | 203 +++++-----
 .../raft/solver/detail/lap_kernels.cuh        |   2 +-
 cpp/include/raft/solver/linear_assignment.cuh |  50 +--
 cpp/include/raft/sparse/convert/csr.cuh       |   4 +-
 .../raft/sparse/convert/detail/adj_to_csr.cuh |   7 +-
 .../raft/sparse/convert/detail/csr.cuh        |  10 +-
 .../raft/sparse/detail/cusparse_wrappers.h    |   2 +-
 cpp/include/raft/sparse/distance/common.h     |   6 +-
 .../sparse/distance/detail/bin_distance.cuh   |  21 +-
 .../raft/sparse/distance/detail/coo_spmv.cuh  |   7 +-
 .../coo_spmv_strategies/base_strategy.cuh     |  79 ++--
 .../coo_spmv_strategies/hash_strategy.cuh     |  36 +-
 .../sparse/distance/detail/ip_distance.cuh    |   7 +-
 .../sparse/distance/detail/l2_distance.cuh    |  40 +-
 .../sparse/distance/detail/lp_distance.cuh    |  21 +-
 .../raft/sparse/linalg/detail/spectral.cuh    |   9 +-
 .../raft/sparse/linalg/detail/spmm.hpp        |  37 +-
 .../raft/sparse/linalg/detail/symmetrize.cuh  |   5 +-
 cpp/include/raft/sparse/linalg/norm.cuh       |   5 +-
 cpp/include/raft/sparse/linalg/spectral.cuh   |   4 +-
 cpp/include/raft/sparse/linalg/spmm.cuh       |   2 +-
 cpp/include/raft/sparse/linalg/symmetrize.cuh |   2 +-
 cpp/include/raft/sparse/linalg/transpose.cuh  |   7 +-
 .../raft/sparse/neighbors/brute_force.cuh     |   7 +-
 .../sparse/neighbors/connect_components.cuh   |   4 +-
 .../neighbors/detail/connect_components.cuh   |  10 +-
 .../raft/sparse/neighbors/detail/knn.cuh      |  69 ++--
 .../sparse/neighbors/detail/knn_graph.cuh     |   5 +-
 cpp/include/raft/sparse/neighbors/knn.cuh     |   5 +-
 .../raft/sparse/neighbors/knn_graph.cuh       |   2 +-
 cpp/include/raft/sparse/op/detail/reduce.cuh  |  10 +-
 cpp/include/raft/sparse/op/filter.cuh         |   2 +-
 cpp/include/raft/sparse/op/reduce.cuh         |   4 +-
 cpp/include/raft/sparse/op/row_op.cuh         |   2 +-
 cpp/include/raft/sparse/op/slice.cuh          |   2 +-
 cpp/include/raft/sparse/op/sort.cuh           |   2 +-
 .../raft/sparse/solver/detail/lanczos.cuh     |  34 +-
 .../sparse/solver/detail/mst_solver_inl.cuh   |  41 ++-
 cpp/include/raft/sparse/solver/lanczos.cuh    |   4 +-
 cpp/include/raft/sparse/solver/mst.cuh        |   2 +-
 cpp/include/raft/sparse/solver/mst_solver.cuh |   6 +-
 cpp/include/raft/spatial/knn/ann.cuh          |   4 +-
 cpp/include/raft/spatial/knn/ball_cover.cuh   |   6 +-
 .../raft/spatial/knn/detail/ann_quantized.cuh |   9 +-
 .../raft/spatial/knn/detail/ball_cover.cuh    | 113 +++---
 .../knn/detail/ball_cover/registers-ext.cuh   |   8 +-
 .../knn/detail/ball_cover/registers-inl.cuh   | 348 +++++++++---------
 .../spatial/knn/detail/haversine_distance.cuh |   2 +-
 cpp/include/raft/spatial/knn/knn.cuh          |   2 +-
 cpp/include/raft/spectral/cluster_solvers.cuh |   9 +-
 .../spectral/cluster_solvers_deprecated.cuh   |   2 +-
 .../raft/spectral/detail/matrix_wrappers.hpp  |  43 ++-
 .../detail/modularity_maximization.hpp        |  14 +-
 .../raft/spectral/detail/partition.hpp        |  14 +-
 .../raft/spectral/detail/spectral_util.cuh    |  21 +-
 cpp/include/raft/spectral/eigen_solvers.cuh   |   4 +-
 .../raft/spectral/modularity_maximization.cuh |   4 +-
 cpp/include/raft/spectral/partition.cuh       |   4 +-
 cpp/include/raft/stats/accuracy.cuh           |   5 +-
 .../raft/stats/adjusted_rand_index.cuh        |   5 +-
 cpp/include/raft/stats/completeness_score.cuh |   5 +-
 cpp/include/raft/stats/contingency_matrix.cuh |  13 +-
 cpp/include/raft/stats/cov.cuh                |   7 +-
 .../stats/detail/batched/silhouette_score.cuh |  23 +-
 cpp/include/raft/stats/detail/cov.cuh         |   5 +-
 .../raft/stats/detail/silhouette_score.cuh    |   5 +-
 .../stats/detail/trustworthiness_score.cuh    |   7 +-
 cpp/include/raft/stats/dispersion.cuh         |   7 +-
 cpp/include/raft/stats/entropy.cuh            |   5 +-
 cpp/include/raft/stats/histogram.cuh          |   5 +-
 cpp/include/raft/stats/homogeneity_score.cuh  |   5 +-
 .../raft/stats/information_criterion.cuh      |   7 +-
 cpp/include/raft/stats/kl_divergence.cuh      |   9 +-
 cpp/include/raft/stats/mean.cuh               |   7 +-
 cpp/include/raft/stats/mean_center.cuh        |   9 +-
 cpp/include/raft/stats/meanvar.cuh            |   5 +-
 cpp/include/raft/stats/minmax.cuh             |   5 +-
 cpp/include/raft/stats/mutual_info_score.cuh  |   5 +-
 cpp/include/raft/stats/r2_score.cuh           |   5 +-
 cpp/include/raft/stats/rand_index.cuh         |   7 +-
 cpp/include/raft/stats/regression_metrics.cuh |   7 +-
 cpp/include/raft/stats/silhouette_score.cuh   |  15 +-
 cpp/include/raft/stats/stddev.cuh             |  11 +-
 cpp/include/raft/stats/sum.cuh                |   5 +-
 .../raft/stats/trustworthiness_score.cuh      |   6 +-
 cpp/include/raft/stats/v_measure.cuh          |   7 +-
 cpp/include/raft/stats/weighted_mean.cuh      |   9 +-
 cpp/include/raft/util/cache.cuh               |   2 +-
 cpp/include/raft_runtime/cluster/kmeans.hpp   |  18 +-
 .../raft_runtime/distance/fused_l2_nn.hpp     |   6 +-
 .../distance/pairwise_distance.hpp            |   4 +-
 cpp/include/raft_runtime/matrix/select_k.hpp  |   4 +-
 .../raft_runtime/neighbors/brute_force.hpp    |   4 +-
 .../raft_runtime/neighbors/ivf_flat.hpp       |  10 +-
 cpp/include/raft_runtime/neighbors/ivf_pq.hpp |  14 +-
 cpp/include/raft_runtime/neighbors/refine.hpp |   6 +-
 .../random/rmat_rectangular_generator.hpp     |  20 +-
 .../raft_internal/matrix/select_k.cuh         |   7 +-
 .../raft_internal/neighbors/refine_helper.cuh |  15 +-
 cpp/src/neighbors/ball_cover.cu               |  10 +-
 cpp/src/neighbors/brute_force_00_generate.py  |   4 +-
 .../brute_force_fused_l2_knn_float_int64_t.cu |   2 +-
 .../brute_force_knn_int64_t_float_int64_t.cu  |   2 +-
 .../brute_force_knn_int64_t_float_uint32_t.cu |   2 +-
 .../brute_force_knn_int_float_int.cu          |   2 +-
 ...brute_force_knn_uint32_t_float_uint32_t.cu |   2 +-
 cpp/src/neighbors/detail/ivf_flat_search.cu   |   2 +-
 cpp/src/neighbors/ivf_flat_00_generate.py     |  18 +-
 .../neighbors/ivf_flat_build_float_int64_t.cu |   6 +-
 .../ivf_flat_build_int8_t_int64_t.cu          |   6 +-
 .../ivf_flat_build_uint8_t_int64_t.cu         |   6 +-
 .../ivf_flat_extend_float_int64_t.cu          |   8 +-
 .../ivf_flat_extend_int8_t_int64_t.cu         |   8 +-
 .../ivf_flat_extend_uint8_t_int64_t.cu        |   8 +-
 .../ivf_flat_search_float_int64_t.cu          |   4 +-
 .../ivf_flat_search_int8_t_int64_t.cu         |   4 +-
 .../ivf_flat_search_uint8_t_int64_t.cu        |   4 +-
 .../neighbors/ivfpq_build_float_int64_t.cu    |   4 +-
 .../neighbors/ivfpq_build_int8_t_int64_t.cu   |   4 +-
 .../neighbors/ivfpq_build_uint8_t_int64_t.cu  |   4 +-
 .../neighbors/ivfpq_extend_float_int64_t.cu   |   8 +-
 .../neighbors/ivfpq_extend_int8_t_int64_t.cu  |   8 +-
 .../neighbors/ivfpq_extend_uint8_t_int64_t.cu |   8 +-
 .../neighbors/ivfpq_search_float_int64_t.cu   |   4 +-
 .../neighbors/ivfpq_search_int8_t_int64_t.cu  |   4 +-
 .../neighbors/ivfpq_search_uint8_t_int64_t.cu |   4 +-
 cpp/src/neighbors/refine_00_generate.py       |   4 +-
 cpp/src/neighbors/refine_float_float.cu       |   4 +-
 cpp/src/neighbors/refine_int8_t_float.cu      |   4 +-
 cpp/src/neighbors/refine_uint8_t_float.cu     |   4 +-
 cpp/src/raft_runtime/cluster/cluster_cost.cuh |  32 +-
 .../cluster/cluster_cost_double.cu            |   4 +-
 .../cluster/cluster_cost_float.cu             |   4 +-
 .../raft_runtime/cluster/kmeans_fit_double.cu |   4 +-
 .../raft_runtime/cluster/kmeans_fit_float.cu  |   4 +-
 .../cluster/kmeans_init_plus_plus_double.cu   |   7 +-
 .../cluster/kmeans_init_plus_plus_float.cu    |   7 +-
 .../raft_runtime/cluster/update_centroids.cuh |  16 +-
 .../cluster/update_centroids_double.cu        |   4 +-
 .../cluster/update_centroids_float.cu         |   4 +-
 .../raft_runtime/distance/fused_l2_min_arg.cu |  33 +-
 .../distance/pairwise_distance.cu             |   6 +-
 .../matrix/select_k_float_int64_t.cu          |   4 +-
 .../brute_force_knn_int64_t_float.cu          |   4 +-
 .../raft_runtime/neighbors/ivf_flat_build.cu  |   8 +-
 .../raft_runtime/neighbors/ivf_flat_search.cu |   2 +-
 cpp/src/raft_runtime/neighbors/ivfpq_build.cu |   8 +-
 .../neighbors/ivfpq_deserialize.cu            |   2 +-
 .../neighbors/ivfpq_search_float_int64_t.cu   |   2 +-
 .../neighbors/ivfpq_search_int8_t_int64_t.cu  |   2 +-
 .../neighbors/ivfpq_search_uint8_t_int64_t.cu |   2 +-
 .../raft_runtime/neighbors/ivfpq_serialize.cu |   2 +-
 .../neighbors/refine_d_int64_t_float.cu       |   2 +-
 .../neighbors/refine_d_int64_t_int8_t.cu      |   2 +-
 .../neighbors/refine_d_int64_t_uint8_t.cu     |   2 +-
 .../neighbors/refine_h_int64_t_float.cu       |   2 +-
 .../neighbors/refine_h_int64_t_int8_t.cu      |   2 +-
 .../neighbors/refine_h_int64_t_uint8_t.cu     |   2 +-
 cpp/src/raft_runtime/random/common.cuh        |  34 +-
 .../knn/detail/ball_cover/registers.cu        |   4 +-
 .../ball_cover/registers_00_generate.py       |   4 +-
 .../ball_cover/registers_pass_one_2d_dist.cu  |   2 +-
 .../registers_pass_one_2d_euclidean.cu        |   2 +-
 .../registers_pass_one_2d_haversine.cu        |   2 +-
 .../ball_cover/registers_pass_one_3d_dist.cu  |   2 +-
 .../registers_pass_one_3d_euclidean.cu        |   2 +-
 .../registers_pass_one_3d_haversine.cu        |   2 +-
 .../ball_cover/registers_pass_two_2d_dist.cu  |   2 +-
 .../registers_pass_two_2d_euclidean.cu        |   2 +-
 .../registers_pass_two_2d_haversine.cu        |   2 +-
 .../ball_cover/registers_pass_two_3d_dist.cu  |   2 +-
 .../registers_pass_two_3d_euclidean.cu        |   2 +-
 .../registers_pass_two_3d_haversine.cu        |   2 +-
 cpp/test/cluster/cluster_solvers.cu           |  14 +-
 .../cluster/cluster_solvers_deprecated.cu     |   7 +-
 cpp/test/cluster/kmeans.cu                    |  23 +-
 cpp/test/cluster/kmeans_balanced.cu           |   7 +-
 cpp/test/cluster/kmeans_find_k.cu             |  12 +-
 cpp/test/cluster/linkage.cu                   |  11 +-
 cpp/test/core/handle.cpp                      |  36 +-
 cpp/test/core/mdarray.cu                      |  37 +-
 cpp/test/core/mdspan_utils.cu                 |  14 +-
 cpp/test/core/numpy_serializer.cu             |   8 +-
 cpp/test/core/sparse_matrix.cu                |   6 +-
 cpp/test/core/temporary_device_buffer.cu      |  18 +-
 cpp/test/distance/dist_adj.cu                 |   9 +-
 cpp/test/distance/distance_base.cuh           |  22 +-
 cpp/test/distance/fused_l2_nn.cu              |   9 +-
 cpp/test/distance/gram.cu                     |   2 +-
 cpp/test/distance/gram_base.cuh               |   5 +-
 cpp/test/distance/masked_nn.cu                |  13 +-
 .../distance/masked_nn_compress_to_bits.cu    |  21 +-
 cpp/test/label/merge_labels.cu                |   7 +-
 cpp/test/lap/lap.cu                           |  19 +-
 cpp/test/linalg/add.cu                        |   7 +-
 cpp/test/linalg/axpy.cu                       |  15 +-
 cpp/test/linalg/binary_op.cu                  |  26 +-
 cpp/test/linalg/cholesky_r1.cu                |  69 ++--
 cpp/test/linalg/coalesced_reduction.cu        |  15 +-
 cpp/test/linalg/divide.cu                     |   7 +-
 cpp/test/linalg/dot.cu                        |  11 +-
 cpp/test/linalg/eig.cu                        |   7 +-
 cpp/test/linalg/eig_sel.cu                    |   7 +-
 cpp/test/linalg/eigen_solvers.cu              |  19 +-
 cpp/test/linalg/eltwise.cu                    |  13 +-
 cpp/test/linalg/gemm_layout.cu                |   7 +-
 cpp/test/linalg/gemv.cu                       |   7 +-
 cpp/test/linalg/map.cu                        |  12 +-
 cpp/test/linalg/map_then_reduce.cu            |  22 +-
 cpp/test/linalg/matrix_vector.cu              |  17 +-
 cpp/test/linalg/matrix_vector_op.cu           |  11 +-
 cpp/test/linalg/mean_squared_error.cu         |  15 +-
 cpp/test/linalg/multiply.cu                   |   7 +-
 cpp/test/linalg/norm.cu                       |  13 +-
 cpp/test/linalg/normalize.cu                  |   7 +-
 cpp/test/linalg/power.cu                      |  15 +-
 cpp/test/linalg/reduce.cu                     |  10 +-
 cpp/test/linalg/reduce_cols_by_key.cu         |   5 +-
 cpp/test/linalg/reduce_rows_by_key.cu         |   7 +-
 cpp/test/linalg/rsvd.cu                       |  15 +-
 cpp/test/linalg/sqrt.cu                       |   9 +-
 cpp/test/linalg/strided_reduction.cu          |  10 +-
 cpp/test/linalg/subtract.cu                   |   7 +-
 cpp/test/linalg/svd.cu                        |   7 +-
 cpp/test/linalg/ternary_op.cu                 |   5 +-
 cpp/test/linalg/transpose.cu                  |  17 +-
 cpp/test/linalg/unary_op.cu                   |  18 +-
 cpp/test/matrix/argmax.cu                     |  13 +-
 cpp/test/matrix/argmin.cu                     |  13 +-
 cpp/test/matrix/columnSort.cu                 |  35 +-
 cpp/test/matrix/diagonal.cu                   |  13 +-
 cpp/test/matrix/gather.cu                     |   7 +-
 cpp/test/matrix/linewise_op.cu                |   5 +-
 cpp/test/matrix/math.cu                       |   7 +-
 cpp/test/matrix/matrix.cu                     |  20 +-
 cpp/test/matrix/norm.cu                       |   7 +-
 cpp/test/matrix/reverse.cu                    |   7 +-
 cpp/test/matrix/select_k.cu                   |  11 +-
 cpp/test/matrix/slice.cu                      |   7 +-
 cpp/test/matrix/triangular.cu                 |   7 +-
 cpp/test/neighbors/ann_cagra.cuh              |  13 +-
 cpp/test/neighbors/ann_ivf_flat.cuh           |  24 +-
 cpp/test/neighbors/ann_ivf_pq.cuh             |  21 +-
 cpp/test/neighbors/ann_utils.cuh              |   8 +-
 cpp/test/neighbors/ball_cover.cu              | 100 +++--
 cpp/test/neighbors/epsilon_neighborhood.cu    |  13 +-
 cpp/test/neighbors/fused_l2_knn.cu            |   5 +-
 cpp/test/neighbors/haversine.cu               |   7 +-
 cpp/test/neighbors/knn.cu                     |   7 +-
 cpp/test/neighbors/refine.cu                  |   9 +-
 cpp/test/neighbors/selection.cu               |  95 +++--
 cpp/test/neighbors/tiled_knn.cu               |   5 +-
 cpp/test/random/make_blobs.cu                 |   7 +-
 cpp/test/random/make_regression.cu            |  26 +-
 cpp/test/random/multi_variable_gaussian.cu    |  81 ++--
 cpp/test/random/permute.cu                    |  23 +-
 cpp/test/random/rmat_rectangular_generator.cu |  11 +-
 cpp/test/random/rng.cu                        |  39 +-
 cpp/test/random/rng_discrete.cu               |   7 +-
 cpp/test/random/rng_int.cu                    |  19 +-
 cpp/test/random/sample_without_replacement.cu |  15 +-
 cpp/test/sparse/add.cu                        |   7 +-
 cpp/test/sparse/convert_coo.cu                |   7 +-
 cpp/test/sparse/convert_csr.cu                |   5 +-
 cpp/test/sparse/csr_row_slice.cu              |  11 +-
 cpp/test/sparse/csr_to_dense.cu               |   7 +-
 cpp/test/sparse/csr_transpose.cu              |  11 +-
 cpp/test/sparse/dist_coo_spmv.cu              |  27 +-
 cpp/test/sparse/distance.cu                   |  21 +-
 cpp/test/sparse/filter.cu                     |   5 +-
 cpp/test/sparse/gram.cu                       |   9 +-
 cpp/test/sparse/mst.cu                        | 108 ++++--
 cpp/test/sparse/neighbors/brute_force.cu      |  21 +-
 .../sparse/neighbors/connect_components.cu    |  14 +-
 cpp/test/sparse/neighbors/knn_graph.cu        |   7 +-
 cpp/test/sparse/norm.cu                       |   7 +-
 cpp/test/sparse/normalize.cu                  |   7 +-
 cpp/test/sparse/reduce.cu                     |   7 +-
 cpp/test/sparse/row_op.cu                     |   5 +-
 cpp/test/sparse/sort.cu                       |   5 +-
 cpp/test/sparse/spectral_matrix.cu            |  10 +-
 cpp/test/sparse/spgemmi.cu                    |  13 +-
 cpp/test/sparse/symmetrize.cu                 |   7 +-
 cpp/test/stats/accuracy.cu                    |   5 +-
 cpp/test/stats/adjusted_rand_index.cu         |   7 +-
 cpp/test/stats/completeness_score.cu          |   5 +-
 cpp/test/stats/contingencyMatrix.cu           |   5 +-
 cpp/test/stats/cov.cu                         |   5 +-
 cpp/test/stats/dispersion.cu                  |   8 +-
 cpp/test/stats/entropy.cu                     |   5 +-
 cpp/test/stats/histogram.cu                   |  23 +-
 cpp/test/stats/homogeneity_score.cu           |   5 +-
 cpp/test/stats/information_criterion.cu       |   7 +-
 cpp/test/stats/kl_divergence.cu               |   5 +-
 cpp/test/stats/mean.cu                        |   5 +-
 cpp/test/stats/mean_center.cu                 |   7 +-
 cpp/test/stats/meanvar.cu                     |   5 +-
 cpp/test/stats/minmax.cu                      |  13 +-
 cpp/test/stats/mutual_info_score.cu           |   7 +-
 cpp/test/stats/r2_score.cu                    |   5 +-
 cpp/test/stats/rand_index.cu                  |   7 +-
 cpp/test/stats/regression_metrics.cu          |   5 +-
 cpp/test/stats/silhouette_score.cu            |  13 +-
 cpp/test/stats/stddev.cu                      |   7 +-
 cpp/test/stats/sum.cu                         |   9 +-
 cpp/test/stats/trustworthiness.cu             |  10 +-
 cpp/test/stats/v_measure.cu                   |   5 +-
 cpp/test/stats/weighted_mean.cu               |  13 +-
 cpp/test/util/cudart_utils.cpp                |   7 +-
 docs/source/developer_guide.md                |   4 +-
 docs/source/using_comms.rst                   |  14 +-
 docs/source/using_libraft.md                  |   2 +-
 468 files changed, 3820 insertions(+), 3108 deletions(-)

diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index 0a80eef1b5..36b4931460 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -22,6 +22,7 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/detail/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
@@ -137,7 +138,7 @@ void RaftIvfFlatGpu<T, IdxT>::search(
   static_assert(sizeof(size_t) == sizeof(IdxT), "IdxT is incompatible with size_t");
   raft::neighbors::ivf_flat::search(
     handle_, search_params_, *index_, queries, batch_size, k, (IdxT*)neighbors, distances, mr_ptr);
-  handle_.sync_stream();
+  resource::sync_stream(handle_);
   return;
 }
 }  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index 517272e6cf..c390d0bd7e 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -21,6 +21,7 @@
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
@@ -176,11 +177,14 @@ void RaftIvfPQ<T, IdxT>::search(const T* queries,
       auto neighbors_host  = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
       auto distances_host  = raft::make_host_matrix<float, IdxT>(batch_size, k);
 
-      raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
+      raft::copy(queries_host.data_handle(),
+                 queries,
+                 queries_host.size(),
+                 resource::get_cuda_stream(handle_));
       raft::copy(candidates_host.data_handle(),
                  candidates.data_handle(),
                  candidates_host.size(),
-                 handle_.get_stream());
+                 resource::get_cuda_stream(handle_));
 
       auto dataset_v = raft::make_host_matrix_view<const T, IdxT>(
         dataset_.data_handle(), batch_size, index_->dim());
@@ -196,9 +200,11 @@ void RaftIvfPQ<T, IdxT>::search(const T* queries,
       raft::copy(neighbors,
                  (size_t*)neighbors_host.data_handle(),
                  neighbors_host.size(),
-                 handle_.get_stream());
-      raft::copy(
-        distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
+                 resource::get_cuda_stream(handle_));
+      raft::copy(distances,
+                 distances_host.data_handle(),
+                 distances_host.size(),
+                 resource::get_cuda_stream(handle_));
     }
   } else {
     auto queries_v =
@@ -209,7 +215,7 @@ void RaftIvfPQ<T, IdxT>::search(const T* queries,
     raft::runtime::neighbors::ivf_pq::search(
       handle_, search_params_, *index_, queries_v, neighbors_v, distances_v);
   }
-  handle_.sync_stream();
+  resource::sync_stream(handle_);
   return;
 }
 }  // namespace raft::bench::ann
diff --git a/cpp/bench/prims/cluster/kmeans_balanced.cu b/cpp/bench/prims/cluster/kmeans_balanced.cu
index 42a8f7967c..effe2a55a4 100644
--- a/cpp/bench/prims/cluster/kmeans_balanced.cu
+++ b/cpp/bench/prims/cluster/kmeans_balanced.cu
@@ -16,6 +16,7 @@
 
 #include <common/benchmark.hpp>
 #include <raft/cluster/kmeans_balanced.cuh>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 
 namespace raft::bench::cluster {
@@ -54,7 +55,7 @@ struct KMeansBalanced : public fixture {
       raft::random::uniform(
         rng, X.data_handle(), params.data.rows * params.data.cols, kRangeMin, kRangeMax, stream);
     }
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void allocate_temp_buffers(const ::benchmark::State& state) override
diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/prims/common/benchmark.hpp
index 1e783eb338..d3da3bff68 100644
--- a/cpp/bench/prims/common/benchmark.hpp
+++ b/cpp/bench/prims/common/benchmark.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <memory>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/detail/macros.hpp>
 #include <raft/core/device_mdarray.hpp>
@@ -113,7 +114,7 @@ class fixture {
   raft::device_resources handle;
   rmm::cuda_stream_view stream;
 
-  fixture(bool use_pool_memory_resource = false) : stream{handle.get_stream()}
+  fixture(bool use_pool_memory_resource = false) : stream{resource::get_cuda_stream(handle)}
   {
     // Cache memory pool between test runs, since it is expensive to create.
     // This speeds up the time required to run the select_k bench by over 3x.
@@ -209,7 +210,7 @@ class BlobsFixture : public fixture {
                                         (T)blobs_params.center_box_min,
                                         (T)blobs_params.center_box_max,
                                         blobs_params.seed);
-    this->handle.sync_stream(stream);
+    resource::sync_stream(this->handle, stream);
   }
 
  protected:
diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu
index 24c0cbf8f9..c0ebd60458 100644
--- a/cpp/bench/prims/distance/fused_l2_nn.cu
+++ b/cpp/bench/prims/distance/fused_l2_nn.cu
@@ -15,6 +15,7 @@
  */
 
 #include <common/benchmark.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -74,7 +75,7 @@ struct fusedl2nn : public fixture {
                           raft::linalg::L2Norm,
                           true,
                           stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void allocate_temp_buffers(const ::benchmark::State& state) override
diff --git a/cpp/bench/prims/distance/kernels.cu b/cpp/bench/prims/distance/kernels.cu
index 53d97c1fc7..7d916e6ce0 100644
--- a/cpp/bench/prims/distance/kernels.cu
+++ b/cpp/bench/prims/distance/kernels.cu
@@ -16,6 +16,7 @@
 #include <common/benchmark.hpp>
 #include <memory>
 #include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/kernels.cuh>
 #include <raft/random/rng.cuh>
@@ -40,7 +41,7 @@ struct GramMatrix : public fixture {
     : params(p), handle(stream), A(0, stream), B(0, stream), C(0, stream)
   {
     kernel = std::unique_ptr<GramMatrixBase<T>>(
-      KernelFactory<T>::create(p.kernel_params, handle.get_cublas_handle()));
+      KernelFactory<T>::create(p.kernel_params, resource::get_cublas_handle(handle)));
 
     A.resize(params.m * params.k, stream);
     B.resize(params.k * params.n, stream);
diff --git a/cpp/bench/prims/matrix/argmin.cu b/cpp/bench/prims/matrix/argmin.cu
index 929eed48c4..a8f667257a 100644
--- a/cpp/bench/prims/matrix/argmin.cu
+++ b/cpp/bench/prims/matrix/argmin.cu
@@ -15,6 +15,7 @@
  */
 
 #include <common/benchmark.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/argmin.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/itertools.hpp>
@@ -40,7 +41,7 @@ struct Argmin : public fixture {
     raft::random::RngState rng{1234};
     raft::random::uniform(
       rng, matrix.data_handle(), params.rows * params.cols, T(-1), T(1), stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void run_benchmark(::benchmark::State& state) override
diff --git a/cpp/bench/prims/matrix/gather.cu b/cpp/bench/prims/matrix/gather.cu
index 213e2aa55f..ca6a2830bd 100644
--- a/cpp/bench/prims/matrix/gather.cu
+++ b/cpp/bench/prims/matrix/gather.cu
@@ -15,6 +15,7 @@
  */
 
 #include <common/benchmark.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/gather.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/itertools.hpp>
@@ -57,7 +58,7 @@ struct Gather : public fixture {
     if constexpr (Conditional) {
       raft::random::uniform(rng, stencil.data_handle(), params.map_length, T(-1), T(1), stream);
     }
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void run_benchmark(::benchmark::State& state) override
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index 8239fa4f89..8cdb816dab 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <common/benchmark.hpp>
+#include <raft/core/resource/device_id.hpp>
 
 #include <raft/random/rng.cuh>
 
@@ -311,12 +312,18 @@ struct knn : public fixture {
             RAFT_CUDA_TRY(cudaHostGetDevicePointer(&data_ptr, data_host_.data(), 0));
             break;
           case TransferStrategy::MANAGED:  // sic! using std::memcpy rather than cuda copy
-            RAFT_CUDA_TRY(cudaMemAdvise(
-              data_ptr, allocation_size, cudaMemAdviseSetPreferredLocation, handle.get_device()));
-            RAFT_CUDA_TRY(cudaMemAdvise(
-              data_ptr, allocation_size, cudaMemAdviseSetAccessedBy, handle.get_device()));
-            RAFT_CUDA_TRY(cudaMemAdvise(
-              data_ptr, allocation_size, cudaMemAdviseSetReadMostly, handle.get_device()));
+            RAFT_CUDA_TRY(cudaMemAdvise(data_ptr,
+                                        allocation_size,
+                                        cudaMemAdviseSetPreferredLocation,
+                                        resource::get_device_id(handle)));
+            RAFT_CUDA_TRY(cudaMemAdvise(data_ptr,
+                                        allocation_size,
+                                        cudaMemAdviseSetAccessedBy,
+                                        resource::get_device_id(handle)));
+            RAFT_CUDA_TRY(cudaMemAdvise(data_ptr,
+                                        allocation_size,
+                                        cudaMemAdviseSetReadMostly,
+                                        resource::get_device_id(handle)));
             std::memcpy(data_ptr, data_host_.data(), allocation_size);
             break;
           default: break;
diff --git a/cpp/include/raft/cluster/detail/agglomerative.cuh b/cpp/include/raft/cluster/detail/agglomerative.cuh
index f4b2ecf051..624e67b7fa 100644
--- a/cpp/include/raft/cluster/detail/agglomerative.cuh
+++ b/cpp/include/raft/cluster/detail/agglomerative.cuh
@@ -16,7 +16,9 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -100,7 +102,7 @@ class UnionFind {
  * @param[out] out_size cluster sizes of output
  */
 template <typename value_idx, typename value_t>
-void build_dendrogram_host(raft::device_resources const& handle,
+void build_dendrogram_host(raft::resources const& handle,
                            const value_idx* rows,
                            const value_idx* cols,
                            const value_t* data,
@@ -109,7 +111,7 @@ void build_dendrogram_host(raft::device_resources const& handle,
                            value_t* out_delta,
                            value_idx* out_size)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   value_idx n_edges = nnz;
 
@@ -121,7 +123,7 @@ void build_dendrogram_host(raft::device_resources const& handle,
   update_host(mst_dst_h.data(), cols, n_edges, stream);
   update_host(mst_weights_h.data(), data, n_edges, stream);
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   std::vector<value_idx> children_h(n_edges * 2);
   std::vector<value_idx> out_size_h(n_edges);
@@ -236,14 +238,14 @@ struct init_label_roots {
  * @param n_leaves
  */
 template <typename value_idx, int tpb = 256>
-void extract_flattened_clusters(raft::device_resources const& handle,
+void extract_flattened_clusters(raft::resources const& handle,
                                 value_idx* labels,
                                 const value_idx* children,
                                 size_t n_clusters,
                                 size_t n_leaves)
 {
-  auto stream        = handle.get_stream();
-  auto thrust_policy = handle.get_thrust_policy();
+  auto stream        = resource::get_cuda_stream(handle);
+  auto thrust_policy = resource::get_thrust_policy(handle);
 
   // Handle special case where n_clusters == 1
   if (n_clusters == 1) {
diff --git a/cpp/include/raft/cluster/detail/connectivities.cuh b/cpp/include/raft/cluster/detail/connectivities.cuh
index 163670f29a..ef046ab4ff 100644
--- a/cpp/include/raft/cluster/detail/connectivities.cuh
+++ b/cpp/include/raft/cluster/detail/connectivities.cuh
@@ -16,7 +16,9 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -40,7 +42,7 @@ namespace raft::cluster::detail {
 
 template <raft::cluster::LinkageDistance dist_type, typename value_idx, typename value_t>
 struct distance_graph_impl {
-  void run(raft::device_resources const& handle,
+  void run(raft::resources const& handle,
            const value_t* X,
            size_t m,
            size_t n,
@@ -58,7 +60,7 @@ struct distance_graph_impl {
  */
 template <typename value_idx, typename value_t>
 struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx, value_t> {
-  void run(raft::device_resources const& handle,
+  void run(raft::resources const& handle,
            const value_t* X,
            size_t m,
            size_t n,
@@ -68,8 +70,8 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx,
            rmm::device_uvector<value_t>& data,
            int c)
   {
-    auto stream        = handle.get_stream();
-    auto thrust_policy = handle.get_thrust_policy();
+    auto stream        = resource::get_cuda_stream(handle);
+    auto thrust_policy = resource::get_thrust_policy(handle);
 
     // Need to symmetrize knn into undirected graph
     raft::sparse::COO<value_t, value_idx> knn_graph_coo(stream);
@@ -127,7 +129,7 @@ __global__ void fill_indices2(value_idx* indices, size_t m, size_t nnz)
  * @param[out] data
  */
 template <typename value_idx, typename value_t>
-void pairwise_distances(const raft::device_resources& handle,
+void pairwise_distances(const raft::resources& handle,
                         const value_t* X,
                         size_t m,
                         size_t n,
@@ -136,8 +138,8 @@ void pairwise_distances(const raft::device_resources& handle,
                         value_idx* indices,
                         value_t* data)
 {
-  auto stream      = handle.get_stream();
-  auto exec_policy = handle.get_thrust_policy();
+  auto stream      = resource::get_cuda_stream(handle);
+  auto exec_policy = resource::get_thrust_policy(handle);
 
   value_idx nnz = m * m;
 
@@ -175,7 +177,7 @@ void pairwise_distances(const raft::device_resources& handle,
  */
 template <typename value_idx, typename value_t>
 struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx, value_t> {
-  void run(const raft::device_resources& handle,
+  void run(const raft::resources& handle,
            const value_t* X,
            size_t m,
            size_t n,
@@ -185,7 +187,7 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx,
            rmm::device_uvector<value_t>& data,
            int c)
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     size_t nnz = m * m;
 
@@ -213,7 +215,7 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx,
  *             which will guarantee k <= log(n) + c
  */
 template <typename value_idx, typename value_t, raft::cluster::LinkageDistance dist_type>
-void get_distance_graph(raft::device_resources const& handle,
+void get_distance_graph(raft::resources const& handle,
                         const value_t* X,
                         size_t m,
                         size_t n,
@@ -223,7 +225,7 @@ void get_distance_graph(raft::device_resources const& handle,
                         rmm::device_uvector<value_t>& data,
                         int c)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   indptr.resize(m + 1, stream);
 
diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh
index e93368fa3c..e647e33734 100644
--- a/cpp/include/raft/cluster/detail/kmeans.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans.cuh
@@ -20,6 +20,8 @@
 #include <cstdio>
 #include <ctime>
 #include <optional>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <random>
 
 #include <cuda.h>
@@ -31,12 +33,12 @@
 #include <raft/common/nvtx.hpp>
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/kvp.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/map_then_reduce.cuh>
 #include <raft/linalg/matrix_vector_op.cuh>
@@ -59,13 +61,13 @@ namespace detail {
 
 // Selects 'n_clusters' samples randomly from X
 template <typename DataT, typename IndexT>
-void initRandom(raft::device_resources const& handle,
+void initRandom(raft::resources const& handle,
                 const KMeansParams& params,
                 raft::device_matrix_view<const DataT, IndexT> X,
                 raft::device_matrix_view<DataT, IndexT> centroids)
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("initRandom");
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_clusters     = params.n_clusters;
   detail::shuffleAndGather<DataT, IndexT>(handle, X, centroids, n_clusters, params.rng_state.seed);
 }
@@ -85,14 +87,14 @@ void initRandom(raft::device_resources const& handle,
  * 5: end for
  */
 template <typename DataT, typename IndexT>
-void kmeansPlusPlus(raft::device_resources const& handle,
+void kmeansPlusPlus(raft::resources const& handle,
                     const KMeansParams& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     raft::device_matrix_view<DataT, IndexT> centroidsRawData,
                     rmm::device_uvector<char>& workspace)
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("kmeansPlusPlus");
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = params.n_clusters;
@@ -244,7 +246,7 @@ void kmeansPlusPlus(raft::device_resources const& handle,
 
       int bestCandidateIdx = -1;
       raft::copy(&bestCandidateIdx, &minClusterIndexAndDistance.data()->key, 1, stream);
-      handle.sync_stream();
+      resource::sync_stream(handle);
       /// <<< End of Step-3 >>>
 
       /// <<< Step-4 >>>: C = C U {x}
@@ -282,7 +284,7 @@ void kmeansPlusPlus(raft::device_resources const& handle,
  * @param[inout] workspace
  */
 template <typename DataT, typename IndexT, typename LabelsIterator>
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       raft::device_matrix_view<const DataT, IndexT, row_major> X,
                       raft::device_vector_view<const DataT, IndexT> sample_weights,
                       raft::device_matrix_view<const DataT, IndexT, row_major> centroids,
@@ -296,7 +298,7 @@ void update_centroids(raft::device_resources const& handle,
   auto n_clusters = centroids.extent(0);
   auto n_samples  = X.extent(0);
 
-  workspace.resize(n_samples, handle.get_stream());
+  workspace.resize(n_samples, resource::get_cuda_stream(handle));
 
   // Calculates weighted sum of all the samples assigned to cluster-i and stores the
   // result in new_centroids[i]
@@ -309,7 +311,7 @@ void update_centroids(raft::device_resources const& handle,
                                    X.extent(1),
                                    n_clusters,
                                    new_centroids.data_handle(),
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 
   // Reduce weights by key to compute weight in each cluster
   raft::linalg::reduce_cols_by_key(sample_weights.data_handle(),
@@ -318,7 +320,7 @@ void update_centroids(raft::device_resources const& handle,
                                    (IndexT)1,
                                    (IndexT)sample_weights.extent(0),
                                    (IndexT)n_clusters,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 
   // Computes new_centroids[i] = new_centroids[i]/weight_per_cluster[i] where
   //   new_centroids[n_clusters x n_features] - 2D array, new_centroids[i] has sum of all the
@@ -334,7 +336,7 @@ void update_centroids(raft::device_resources const& handle,
                                true,
                                false,
                                raft::div_checkzero_op{},
-                               handle.get_stream());
+                               resource::get_cuda_stream(handle));
 
   // copy centroids[i] to new_centroids[i] when weight_per_cluster[i] is 0
   cub::ArgIndexInputIterator<DataT*> itr_wt(weight_per_cluster.data_handle());
@@ -351,12 +353,12 @@ void update_centroids(raft::device_resources const& handle,
       return map.value == 0;
     },
     raft::key_op{},
-    handle.get_stream());
+    resource::get_cuda_stream(handle));
 }
 
 // TODO: Resizing is needed to use mdarray instead of rmm::device_uvector
 template <typename DataT, typename IndexT>
-void kmeans_fit_main(raft::device_resources const& handle,
+void kmeans_fit_main(raft::resources const& handle,
                      const KMeansParams& params,
                      raft::device_matrix_view<const DataT, IndexT> X,
                      raft::device_vector_view<const DataT, IndexT> weight,
@@ -367,7 +369,7 @@ void kmeans_fit_main(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("kmeans_fit_main");
   logger::get(RAFT_NAME).set_level(params.verbosity);
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = params.n_clusters;
@@ -498,7 +500,7 @@ void kmeans_fit_main(raft::device_resources const& handle,
       priorClusteringCost = curClusteringCost;
     }
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
     if (sqrdNormError < params.tol) done = true;
 
     if (done) {
@@ -522,7 +524,7 @@ void kmeans_fit_main(raft::device_resources const& handle,
                                                       workspace);
 
   // TODO: add different templates for InType of binaryOp to avoid thrust transform
-  thrust::transform(handle.get_thrust_policy(),
+  thrust::transform(resource::get_thrust_policy(handle),
                     minClusterAndDistance.data_handle(),
                     minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                     weight.data_handle(),
@@ -573,14 +575,14 @@ void kmeans_fit_main(raft::device_resources const& handle,
 
  */
 template <typename DataT, typename IndexT>
-void initScalableKMeansPlusPlus(raft::device_resources const& handle,
+void initScalableKMeansPlusPlus(raft::resources const& handle,
                                 const KMeansParams& params,
                                 raft::device_matrix_view<const DataT, IndexT> X,
                                 raft::device_matrix_view<DataT, IndexT> centroidsRawData,
                                 rmm::device_uvector<char>& workspace)
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("initScalableKMeansPlusPlus");
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = params.n_clusters;
@@ -662,7 +664,7 @@ void initScalableKMeansPlusPlus(raft::device_resources const& handle,
   // <<< End of Step-2 >>>
 
   // Scalable kmeans++ paper claims 8 rounds is sufficient
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   int niter = std::min(8, (int)ceil(log(psi)));
   RAFT_LOG_DEBUG("KMeans||: psi = %g, log(psi) = %g, niter = %d ", psi, log(psi), niter);
 
@@ -816,7 +818,7 @@ void initScalableKMeansPlusPlus(raft::device_resources const& handle,
  * @param[out]    n_iter        Number of iterations run.
  */
 template <typename DataT, typename IndexT>
-void kmeans_fit(raft::device_resources const& handle,
+void kmeans_fit(raft::resources const& handle,
                 const KMeansParams& params,
                 raft::device_matrix_view<const DataT, IndexT> X,
                 std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -828,7 +830,7 @@ void kmeans_fit(raft::device_resources const& handle,
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = params.n_clusters;
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   // Check that parameters are valid
   if (sample_weight.has_value())
     RAFT_EXPECTS(sample_weight.value().extent(0) == n_samples,
@@ -870,8 +872,10 @@ void kmeans_fit(raft::device_resources const& handle,
   if (sample_weight.has_value())
     raft::copy(weight.data_handle(), sample_weight.value().data_handle(), n_samples, stream);
   else
-    thrust::fill(
-      handle.get_thrust_policy(), weight.data_handle(), weight.data_handle() + weight.size(), 1);
+    thrust::fill(resource::get_thrust_policy(handle),
+                 weight.data_handle(),
+                 weight.data_handle() + weight.size(),
+                 1);
 
   // check if weights sum up to n_samples
   checkWeight<DataT>(handle, weight.view(), workspace);
@@ -955,7 +959,7 @@ void kmeans_fit(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_fit(raft::device_resources const& handle,
+void kmeans_fit(raft::resources const& handle,
                 const KMeansParams& params,
                 const DataT* X,
                 const DataT* sample_weight,
@@ -980,7 +984,7 @@ void kmeans_fit(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT>
-void kmeans_predict(raft::device_resources const& handle,
+void kmeans_predict(raft::resources const& handle,
                     const KMeansParams& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -992,7 +996,7 @@ void kmeans_predict(raft::device_resources const& handle,
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("kmeans_predict");
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   // Check that parameters are valid
   if (sample_weight.has_value())
     RAFT_EXPECTS(sample_weight.value().extent(0) == n_samples,
@@ -1015,8 +1019,10 @@ void kmeans_predict(raft::device_resources const& handle,
   if (sample_weight.has_value())
     raft::copy(weight.data_handle(), sample_weight.value().data_handle(), n_samples, stream);
   else
-    thrust::fill(
-      handle.get_thrust_policy(), weight.data_handle(), weight.data_handle() + weight.size(), 1);
+    thrust::fill(resource::get_thrust_policy(handle),
+                 weight.data_handle(),
+                 weight.data_handle() + weight.size(),
+                 1);
 
   // check if weights sum up to n_samples
   if (normalize_weight) checkWeight(handle, weight.view(), workspace);
@@ -1059,7 +1065,7 @@ void kmeans_predict(raft::device_resources const& handle,
   // calculate cluster cost phi_x(C)
   rmm::device_scalar<DataT> clusterCostD(stream);
   // TODO: add different templates for InType of binaryOp to avoid thrust transform
-  thrust::transform(handle.get_thrust_policy(),
+  thrust::transform(resource::get_thrust_policy(handle),
                     minClusterAndDistance.data_handle(),
                     minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                     weight.data_handle(),
@@ -1078,7 +1084,7 @@ void kmeans_predict(raft::device_resources const& handle,
                              raft::value_op{},
                              raft::add_op{});
 
-  thrust::transform(handle.get_thrust_policy(),
+  thrust::transform(resource::get_thrust_policy(handle),
                     minClusterAndDistance.data_handle(),
                     minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                     labels.data_handle(),
@@ -1088,7 +1094,7 @@ void kmeans_predict(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_predict(raft::device_resources const& handle,
+void kmeans_predict(raft::resources const& handle,
                     const KMeansParams& params,
                     const DataT* X,
                     const DataT* sample_weight,
@@ -1120,7 +1126,7 @@ void kmeans_predict(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_fit_predict(raft::device_resources const& handle,
+void kmeans_fit_predict(raft::resources const& handle,
                         const KMeansParams& params,
                         raft::device_matrix_view<const DataT, IndexT> X,
                         std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -1147,7 +1153,7 @@ void kmeans_fit_predict(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_fit_predict(raft::device_resources const& handle,
+void kmeans_fit_predict(raft::resources const& handle,
                         const KMeansParams& params,
                         const DataT* X,
                         const DataT* sample_weight,
@@ -1187,7 +1193,7 @@ void kmeans_fit_predict(raft::device_resources const& handle,
  * @param[out]    X_new         X transformed in the new space..
  */
 template <typename DataT, typename IndexT = int>
-void kmeans_transform(raft::device_resources const& handle,
+void kmeans_transform(raft::resources const& handle,
                       const KMeansParams& params,
                       raft::device_matrix_view<const DataT> X,
                       raft::device_matrix_view<const DataT> centroids,
@@ -1195,7 +1201,7 @@ void kmeans_transform(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("kmeans_transform");
   logger::get(RAFT_NAME).set_level(params.verbosity);
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = params.n_clusters;
@@ -1228,7 +1234,7 @@ void kmeans_transform(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_transform(raft::device_resources const& handle,
+void kmeans_transform(raft::resources const& handle,
                       const KMeansParams& params,
                       const DataT* X,
                       const DataT* centroids,
diff --git a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh
index edc74a085f..f6bdb191cd 100644
--- a/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_auto_find_k.cuh
@@ -17,6 +17,7 @@
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdarray.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <thrust/host_vector.h>
 
 #include <raft/core/logger.hpp>
@@ -25,13 +26,13 @@
 
 #include <raft/core/error.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/dispersion.cuh>
 
 namespace raft::cluster::detail {
 
 template <typename value_t, typename idx_t>
-void compute_dispersion(raft::device_resources const& handle,
+void compute_dispersion(raft::resources const& handle,
                         raft::device_matrix_view<const value_t, idx_t> X,
                         KMeansParams& params,
                         raft::device_matrix_view<value_t, idx_t> centroids_view,
@@ -66,7 +67,7 @@ void compute_dispersion(raft::device_resources const& handle,
 }
 
 template <typename idx_t, typename value_t>
-void find_k(raft::device_resources const& handle,
+void find_k(raft::resources const& handle,
             raft::device_matrix_view<const value_t, idx_t> X,
             raft::host_scalar_view<idx_t> best_k,
             raft::host_scalar_view<value_t> residual,
@@ -92,7 +93,7 @@ void find_k(raft::device_resources const& handle,
   auto clusterSizes = raft::make_device_vector<idx_t>(handle, kmax);
   auto labels       = raft::make_device_vector<idx_t>(handle, n);
 
-  rmm::device_uvector<char> workspace(0, handle.get_stream());
+  rmm::device_uvector<char> workspace(0, resource::get_cuda_stream(handle));
 
   idx_t* clusterSizes_ptr = clusterSizes.data_handle();
 
diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
index 9e5f7a7c9a..866a0ebdfa 100644
--- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh
@@ -17,6 +17,9 @@
 #pragma once
 
 #include <limits>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <type_traits>
 
 #include <raft/cluster/detail/kmeans_common.cuh>
@@ -80,7 +83,7 @@ constexpr static inline float kAdjustCentersWeight = 7.0f;
  */
 template <typename MathT, typename IdxT, typename LabelT>
 inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   const kmeans_balanced_params& params,
   const MathT* centers,
   IdxT n_clusters,
@@ -91,7 +94,7 @@ inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
   LabelT* labels,
   rmm::mr::device_memory_resource* mr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   switch (params.metric) {
     case raft::distance::DistanceType::L2Expanded:
     case raft::distance::DistanceType::L2SqrtExpanded: {
@@ -101,7 +104,7 @@ inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
       auto minClusterAndDistance = raft::make_device_mdarray<raft::KeyValuePair<IdxT, MathT>, IdxT>(
         handle, mr, make_extents<IdxT>(n_rows));
       raft::KeyValuePair<IdxT, MathT> initial_value(0, std::numeric_limits<MathT>::max());
-      thrust::fill(handle.get_thrust_policy(),
+      thrust::fill(resource::get_thrust_policy(handle),
                    minClusterAndDistance.data_handle(),
                    minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                    initial_value);
@@ -127,7 +130,7 @@ inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
 
       // todo(lsugy): use KVP + iterator in caller.
       // Copy keys to output labels
-      thrust::transform(handle.get_thrust_policy(),
+      thrust::transform(resource::get_thrust_policy(handle),
                         minClusterAndDistance.data_handle(),
                         minClusterAndDistance.data_handle() + n_rows,
                         labels,
@@ -251,7 +254,7 @@ template <typename T,
           typename LabelT,
           typename CounterT,
           typename MappingOpT>
-void calc_centers_and_sizes(const raft::device_resources& handle,
+void calc_centers_and_sizes(const raft::resources& handle,
                             MathT* centers,
                             CounterT* cluster_sizes,
                             IdxT n_clusters,
@@ -263,8 +266,8 @@ void calc_centers_and_sizes(const raft::device_resources& handle,
                             MappingOpT mapping_op,
                             rmm::mr::device_memory_resource* mr = nullptr)
 {
-  auto stream = handle.get_stream();
-  if (mr == nullptr) { mr = handle.get_workspace_resource(); }
+  auto stream = resource::get_cuda_stream(handle);
+  if (mr == nullptr) { mr = resource::get_workspace_resource(handle); }
 
   if (!reset_counters) {
     raft::linalg::matrixVectorOp(
@@ -314,7 +317,7 @@ void calc_centers_and_sizes(const raft::device_resources& handle,
 
 /** Computes the L2 norm of the dataset, converting to MathT if necessary */
 template <typename T, typename MathT, typename IdxT, typename MappingOpT>
-void compute_norm(const raft::device_resources& handle,
+void compute_norm(const raft::resources& handle,
                   MathT* dataset_norm,
                   const T* dataset,
                   IdxT dim,
@@ -323,8 +326,8 @@ void compute_norm(const raft::device_resources& handle,
                   rmm::mr::device_memory_resource* mr = nullptr)
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("compute_norm");
-  auto stream = handle.get_stream();
-  if (mr == nullptr) { mr = handle.get_workspace_resource(); }
+  auto stream = resource::get_cuda_stream(handle);
+  if (mr == nullptr) { mr = resource::get_workspace_resource(handle); }
   rmm::device_uvector<MathT> mapped_dataset(0, stream, mr);
 
   const MathT* dataset_ptr = nullptr;
@@ -365,7 +368,7 @@ void compute_norm(const raft::device_resources& handle,
  * @param[in] dataset_norm (optional) Pre-computed norms of each row in the dataset [n_rows]
  */
 template <typename T, typename MathT, typename IdxT, typename LabelT, typename MappingOpT>
-void predict(const raft::device_resources& handle,
+void predict(const raft::resources& handle,
              const kmeans_balanced_params& params,
              const MathT* centers,
              IdxT n_clusters,
@@ -377,10 +380,10 @@ void predict(const raft::device_resources& handle,
              rmm::mr::device_memory_resource* mr = nullptr,
              const MathT* dataset_norm           = nullptr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "predict(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
-  if (mr == nullptr) { mr = handle.get_workspace_resource(); }
+  if (mr == nullptr) { mr = resource::get_workspace_resource(handle); }
   auto [max_minibatch_size, _mem_per_row] =
     calc_minibatch_size<MathT>(n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
   rmm::device_uvector<MathT> cur_dataset(
@@ -612,7 +615,7 @@ template <typename T,
           typename LabelT,
           typename CounterT,
           typename MappingOpT>
-void balancing_em_iters(const raft::device_resources& handle,
+void balancing_em_iters(const raft::resources& handle,
                         const kmeans_balanced_params& params,
                         uint32_t n_iters,
                         IdxT dim,
@@ -628,7 +631,7 @@ void balancing_em_iters(const raft::device_resources& handle,
                         MappingOpT mapping_op,
                         rmm::mr::device_memory_resource* device_memory)
 {
-  auto stream                = handle.get_stream();
+  auto stream                = resource::get_cuda_stream(handle);
   uint32_t balancing_counter = balancing_pullback;
   for (uint32_t iter = 0; iter < n_iters; iter++) {
     // Balancing step - move the centers around to equalize cluster sizes
@@ -699,7 +702,7 @@ template <typename T,
           typename LabelT,
           typename CounterT,
           typename MappingOpT>
-void build_clusters(const raft::device_resources& handle,
+void build_clusters(const raft::resources& handle,
                     const kmeans_balanced_params& params,
                     IdxT dim,
                     const T* dataset,
@@ -712,7 +715,7 @@ void build_clusters(const raft::device_resources& handle,
                     rmm::mr::device_memory_resource* device_memory,
                     const MathT* dataset_norm = nullptr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   // "randomly" initialize labels
   auto labels_view = raft::make_device_vector_view<LabelT, IdxT>(cluster_labels, n_rows);
@@ -836,7 +839,7 @@ template <typename T,
           typename LabelT,
           typename CounterT,
           typename MappingOpT>
-auto build_fine_clusters(const raft::device_resources& handle,
+auto build_fine_clusters(const raft::resources& handle,
                          const kmeans_balanced_params& params,
                          IdxT dim,
                          const T* dataset_mptr,
@@ -854,7 +857,7 @@ auto build_fine_clusters(const raft::device_resources& handle,
                          rmm::mr::device_memory_resource* managed_memory,
                          rmm::mr::device_memory_resource* device_memory) -> IdxT
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   rmm::device_uvector<IdxT> mc_trainset_ids_buf(mesocluster_size_max, stream, managed_memory);
   rmm::device_uvector<MathT> mc_trainset_buf(mesocluster_size_max * dim, stream, device_memory);
   rmm::device_uvector<MathT> mc_trainset_norm_buf(mesocluster_size_max, stream, device_memory);
@@ -898,7 +901,7 @@ auto build_fine_clusters(const raft::device_resources& handle,
     raft::matrix::gather(mapping_itr, dim, n_rows, mc_trainset_ids, k, mc_trainset, stream);
     if (params.metric == raft::distance::DistanceType::L2Expanded ||
         params.metric == raft::distance::DistanceType::L2SqrtExpanded) {
-      thrust::gather(handle.get_thrust_policy(),
+      thrust::gather(resource::get_thrust_policy(handle),
                      mc_trainset_ids,
                      mc_trainset_ids + k,
                      dataset_norm_mptr,
@@ -922,7 +925,7 @@ auto build_fine_clusters(const raft::device_resources& handle,
                mc_trainset_ccenters.data(),
                fine_clusters_nums[i] * dim,
                stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
     n_clusters_done += fine_clusters_nums[i];
   }
   return n_clusters_done;
@@ -949,7 +952,7 @@ auto build_fine_clusters(const raft::device_resources& handle,
  * @param stream
  */
 template <typename T, typename MathT, typename IdxT, typename MappingOpT>
-void build_hierarchical(const raft::device_resources& handle,
+void build_hierarchical(const raft::resources& handle,
                         const kmeans_balanced_params& params,
                         IdxT dim,
                         const T* dataset,
@@ -958,7 +961,7 @@ void build_hierarchical(const raft::device_resources& handle,
                         IdxT n_clusters,
                         MappingOpT mapping_op)
 {
-  auto stream  = handle.get_stream();
+  auto stream  = resource::get_cuda_stream(handle);
   using LabelT = uint32_t;
 
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
@@ -968,7 +971,7 @@ void build_hierarchical(const raft::device_resources& handle,
   RAFT_LOG_DEBUG("build_hierarchical: n_mesoclusters: %u", n_mesoclusters);
 
   rmm::mr::managed_memory_resource managed_memory;
-  rmm::mr::device_memory_resource* device_memory = handle.get_workspace_resource();
+  rmm::mr::device_memory_resource* device_memory = resource::get_workspace_resource(handle);
   auto [max_minibatch_size, mem_per_row] =
     calc_minibatch_size<MathT>(n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
   auto pool_guard =
@@ -1024,7 +1027,7 @@ void build_hierarchical(const raft::device_resources& handle,
   auto mesocluster_sizes  = mesocluster_sizes_buf.data();
   auto mesocluster_labels = mesocluster_labels_buf.data();
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   // build fine clusters
   auto [mesocluster_size_max, fine_clusters_nums_max, fine_clusters_nums, fine_clusters_csum] =
diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh
index cca1cbb6e9..5d56a1d081 100644
--- a/cpp/include/raft/cluster/detail/kmeans_common.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh
@@ -20,6 +20,8 @@
 #include <cstdio>
 #include <ctime>
 #include <optional>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <random>
 
 #include <cub/cub.cuh>
@@ -30,11 +32,11 @@
 #include <raft/cluster/kmeans_types.hpp>
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/kvp.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
@@ -89,14 +91,14 @@ struct KeyValueIndexOp {
 
 // Computes the intensity histogram from a sequence of labels
 template <typename SampleIteratorT, typename CounterT, typename IndexT>
-void countLabels(raft::device_resources const& handle,
+void countLabels(raft::resources const& handle,
                  SampleIteratorT labels,
                  CounterT* count,
                  IndexT n_samples,
                  IndexT n_clusters,
                  rmm::device_uvector<char>& workspace)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   // CUB::DeviceHistogram requires a signed index type
   typedef typename std::make_signed_t<IndexT> CubIndexT;
@@ -130,11 +132,11 @@ void countLabels(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT>
-void checkWeight(raft::device_resources const& handle,
+void checkWeight(raft::resources const& handle,
                  raft::device_vector_view<DataT, IndexT> weight,
                  rmm::device_uvector<char>& workspace)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto wt_aggr        = raft::make_device_scalar<DataT>(handle, 0);
   auto n_samples      = weight.extent(0);
 
@@ -152,7 +154,7 @@ void checkWeight(raft::device_resources const& handle,
                                        stream));
   DataT wt_sum = 0;
   raft::copy(&wt_sum, wt_aggr.data_handle(), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   if (wt_sum != n_samples) {
     RAFT_LOG_DEBUG(
@@ -188,14 +190,14 @@ template <typename InputT,
           typename MainOpT,
           typename ReductionOpT,
           typename IndexT = int>
-void computeClusterCost(raft::device_resources const& handle,
+void computeClusterCost(raft::resources const& handle,
                         raft::device_vector_view<InputT, IndexT> minClusterDistance,
                         rmm::device_uvector<char>& workspace,
                         raft::device_scalar_view<OutputT> clusterCost,
                         MainOpT main_op,
                         ReductionOpT reduction_op)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   cub::TransformInputIterator<OutputT, MainOpT, InputT*> itr(minClusterDistance.data_handle(),
                                                              main_op);
@@ -223,7 +225,7 @@ void computeClusterCost(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT>
-void sampleCentroids(raft::device_resources const& handle,
+void sampleCentroids(raft::resources const& handle,
                      raft::device_matrix_view<const DataT, IndexT> X,
                      raft::device_vector_view<DataT, IndexT> minClusterDistance,
                      raft::device_vector_view<uint8_t, IndexT> isSampleCentroid,
@@ -231,7 +233,7 @@ void sampleCentroids(raft::device_resources const& handle,
                      rmm::device_uvector<DataT>& inRankCp,
                      rmm::device_uvector<char>& workspace)
 {
-  cudaStream_t stream  = handle.get_stream();
+  cudaStream_t stream  = resource::get_cuda_stream(handle);
   auto n_local_samples = X.extent(0);
   auto n_features      = X.extent(1);
 
@@ -262,10 +264,10 @@ void sampleCentroids(raft::device_resources const& handle,
 
   IndexT nPtsSampledInRank = 0;
   raft::copy(&nPtsSampledInRank, nSelected.data_handle(), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   uint8_t* rawPtr_isSampleCentroid = isSampleCentroid.data_handle();
-  thrust::for_each_n(handle.get_thrust_policy(),
+  thrust::for_each_n(resource::get_thrust_policy(handle),
                      sampledMinClusterDistance.data_handle(),
                      nPtsSampledInRank,
                      [=] __device__(raft::KeyValuePair<ptrdiff_t, DataT> val) {
@@ -287,7 +289,7 @@ void sampleCentroids(raft::device_resources const& handle,
 // calculate pairwise distance between 'dataset[n x d]' and 'centroids[k x d]',
 // result will be stored in 'pairwiseDistance[n x k]'
 template <typename DataT, typename IndexT>
-void pairwise_distance_kmeans(raft::device_resources const& handle,
+void pairwise_distance_kmeans(raft::resources const& handle,
                               raft::device_matrix_view<const DataT, IndexT> X,
                               raft::device_matrix_view<const DataT, IndexT> centroids,
                               raft::device_matrix_view<DataT, IndexT> pairwiseDistance,
@@ -315,13 +317,13 @@ void pairwise_distance_kmeans(raft::device_resources const& handle,
 // shuffle and randomly select 'n_samples_to_gather' from input 'in' and stores
 // in 'out' does not modify the input
 template <typename DataT, typename IndexT>
-void shuffleAndGather(raft::device_resources const& handle,
+void shuffleAndGather(raft::resources const& handle,
                       raft::device_matrix_view<const DataT, IndexT> in,
                       raft::device_matrix_view<DataT, IndexT> out,
                       uint32_t n_samples_to_gather,
                       uint64_t seed)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = in.extent(0);
   auto n_features     = in.extent(1);
 
@@ -350,7 +352,7 @@ void shuffleAndGather(raft::device_resources const& handle,
 // is the distance between the sample and the 'centroid[key]'
 template <typename DataT, typename IndexT>
 void minClusterAndDistanceCompute(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const DataT, IndexT> X,
   raft::device_matrix_view<const DataT, IndexT> centroids,
   raft::device_vector_view<raft::KeyValuePair<IndexT, DataT>, IndexT> minClusterAndDistance,
@@ -361,7 +363,7 @@ void minClusterAndDistanceCompute(
   int batch_centroids,
   rmm::device_uvector<char>& workspace)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = centroids.extent(0);
@@ -397,7 +399,7 @@ void minClusterAndDistanceCompute(
 
   raft::KeyValuePair<IndexT, DataT> initial_value(0, std::numeric_limits<DataT>::max());
 
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                minClusterAndDistance.data_handle(),
                minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                initial_value);
@@ -483,7 +485,7 @@ void minClusterAndDistanceCompute(
 }
 
 template <typename DataT, typename IndexT>
-void minClusterDistanceCompute(raft::device_resources const& handle,
+void minClusterDistanceCompute(raft::resources const& handle,
                                raft::device_matrix_view<const DataT, IndexT> X,
                                raft::device_matrix_view<DataT, IndexT> centroids,
                                raft::device_vector_view<DataT, IndexT> minClusterDistance,
@@ -494,7 +496,7 @@ void minClusterDistanceCompute(raft::device_resources const& handle,
                                int batch_centroids,
                                rmm::device_uvector<char>& workspace)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = centroids.extent(0);
@@ -525,7 +527,7 @@ void minClusterDistanceCompute(raft::device_resources const& handle,
   auto pairwiseDistance = raft::make_device_matrix_view<DataT, IndexT>(
     L2NormBuf_OR_DistBuf.data(), dataBatchSize, centroidsBatchSize);
 
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                minClusterDistance.data_handle(),
                minClusterDistance.data_handle() + minClusterDistance.size(),
                std::numeric_limits<DataT>::max());
@@ -601,7 +603,7 @@ void minClusterDistanceCompute(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT>
-void countSamplesInCluster(raft::device_resources const& handle,
+void countSamplesInCluster(raft::resources const& handle,
                            const KMeansParams& params,
                            raft::device_matrix_view<const DataT, IndexT> X,
                            raft::device_vector_view<const DataT, IndexT> L2NormX,
@@ -609,7 +611,7 @@ void countSamplesInCluster(raft::device_resources const& handle,
                            rmm::device_uvector<char>& workspace,
                            raft::device_vector_view<DataT, IndexT> sampleCountInCluster)
 {
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   auto n_samples      = X.extent(0);
   auto n_features     = X.extent(1);
   auto n_clusters     = centroids.extent(0);
diff --git a/cpp/include/raft/cluster/detail/kmeans_deprecated.cuh b/cpp/include/raft/cluster/detail/kmeans_deprecated.cuh
index bb1d122a24..5a1479a81f 100644
--- a/cpp/include/raft/cluster/detail/kmeans_deprecated.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_deprecated.cuh
@@ -25,6 +25,9 @@
 #include <cmath>
 #include <cstdio>
 #include <ctime>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <cuda.h>
 #include <thrust/binary_search.h>
@@ -42,7 +45,7 @@
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/spectral/detail/warn_dbg.hpp>
 #include <raft/spectral/matrix_wrappers.hpp>
@@ -360,7 +363,7 @@ static __global__ void divideCentroids(index_type_t d,
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-static int chooseNewCentroid(raft::device_resources const& handle,
+static int chooseNewCentroid(raft::resources const& handle,
                              index_type_t n,
                              index_type_t d,
                              value_type_t rand,
@@ -375,8 +378,8 @@ static int chooseNewCentroid(raft::device_resources const& handle,
   // Observation vector that is chosen as new centroid
   index_type_t obsIndex;
 
-  auto stream             = handle.get_stream();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   // Compute cumulative sum of distances
   thrust::inclusive_scan(thrust_exec_policy,
@@ -457,7 +460,7 @@ static int chooseNewCentroid(raft::device_resources const& handle,
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-static int initializeCentroids(raft::device_resources const& handle,
+static int initializeCentroids(raft::resources const& handle,
                                index_type_t n,
                                index_type_t d,
                                index_type_t k,
@@ -479,8 +482,8 @@ static int initializeCentroids(raft::device_resources const& handle,
   thrust::default_random_engine rng(seed);
   thrust::uniform_real_distribution<value_type_t> uniformDist(0, 1);
 
-  auto stream             = handle.get_stream();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   constexpr unsigned grid_lower_bound{65535};
 
@@ -568,7 +571,7 @@ static int initializeCentroids(raft::device_resources const& handle,
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-static int assignCentroids(raft::device_resources const& handle,
+static int assignCentroids(raft::resources const& handle,
                            index_type_t n,
                            index_type_t d,
                            index_type_t k,
@@ -579,8 +582,8 @@ static int assignCentroids(raft::device_resources const& handle,
                            index_type_t* __restrict__ clusterSizes,
                            value_type_t* residual_host)
 {
-  auto stream             = handle.get_stream();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   // Compute distance between centroids and observation vectors
   RAFT_CUDA_TRY(cudaMemsetAsync(dists, 0, n * k * sizeof(value_type_t), stream));
@@ -640,7 +643,7 @@ static int assignCentroids(raft::device_resources const& handle,
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-static int updateCentroids(raft::device_resources const& handle,
+static int updateCentroids(raft::resources const& handle,
                            index_type_t n,
                            index_type_t d,
                            index_type_t k,
@@ -661,9 +664,9 @@ static int updateCentroids(raft::device_resources const& handle,
 
   constexpr unsigned grid_lower_bound{65535};
 
-  auto stream             = handle.get_stream();
-  auto cublas_h           = handle.get_cublas_handle();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto cublas_h           = resource::get_cublas_handle(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   // Device memory
   thrust::device_ptr<value_type_t> obs_copy(work);
@@ -783,7 +786,7 @@ static int updateCentroids(raft::device_resources const& handle,
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-int kmeans(raft::device_resources const& handle,
+int kmeans(raft::resources const& handle,
            index_type_t n,
            index_type_t d,
            index_type_t k,
@@ -819,9 +822,9 @@ int kmeans(raft::device_resources const& handle,
   // Initialization
   // -------------------------------------------------------
 
-  auto stream             = handle.get_stream();
-  auto cublas_h           = handle.get_cublas_handle();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto cublas_h           = resource::get_cublas_handle(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   // Trivial cases
   if (k == 1) {
@@ -950,7 +953,7 @@ int kmeans(raft::device_resources const& handle,
  *  @return error flag
  */
 template <typename index_type_t, typename value_type_t>
-int kmeans(raft::device_resources const& handle,
+int kmeans(raft::resources const& handle,
            index_type_t n,
            index_type_t d,
            index_type_t k,
diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh
index 46e31b672e..c4dd74f255 100644
--- a/cpp/include/raft/cluster/detail/mst.cuh
+++ b/cpp/include/raft/cluster/detail/mst.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -67,7 +68,7 @@ void merge_msts(sparse::solver::Graph_COO<value_idx, value_idx, value_t>& coo1,
  */
 template <typename value_idx, typename value_t, typename red_op>
 void connect_knn_graph(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const value_t* X,
   sparse::solver::Graph_COO<value_idx, value_idx, value_t>& msf,
   size_t m,
@@ -76,7 +77,7 @@ void connect_knn_graph(
   red_op reduction_op,
   raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   raft::sparse::COO<value_t, value_idx> connected_edges(stream);
 
@@ -130,7 +131,7 @@ void connect_knn_graph(
  */
 template <typename value_idx, typename value_t, typename red_op>
 void build_sorted_mst(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const value_t* X,
   const value_idx* indptr,
   const value_idx* indices,
@@ -146,7 +147,7 @@ void build_sorted_mst(
   raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded,
   int max_iter                        = 10)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   // We want to have MST initialize colors on first call.
   auto mst_coo = raft::sparse::solver::mst<value_idx, value_idx, value_t, double>(
diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh
index 473d858827..ddd422a89b 100644
--- a/cpp/include/raft/cluster/detail/single_linkage.cuh
+++ b/cpp/include/raft/cluster/detail/single_linkage.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -49,7 +50,7 @@ static const size_t EMPTY = 0;
  * @param[in] n_clusters number of clusters to assign data samples
  */
 template <typename value_idx, typename value_t, LinkageDistance dist_type>
-void single_linkage(raft::device_resources const& handle,
+void single_linkage(raft::resources const& handle,
                     const value_t* X,
                     size_t m,
                     size_t n,
@@ -60,7 +61,7 @@ void single_linkage(raft::device_resources const& handle,
 {
   ASSERT(n_clusters <= m, "n_clusters must be less than or equal to the number of data points");
 
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<value_idx> indptr(EMPTY, stream);
   rmm::device_uvector<value_idx> indices(EMPTY, stream);
diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh
index da5f0458ad..d63413e82e 100644
--- a/cpp/include/raft/cluster/kmeans.cuh
+++ b/cpp/include/raft/cluster/kmeans.cuh
@@ -22,6 +22,7 @@
 #include <raft/core/kvp.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 
 namespace raft::cluster::kmeans {
 
@@ -45,12 +46,12 @@ using KeyValueIndexOp = detail::KeyValueIndexOp<IndexT, DataT>;
  *   k-means++ algorithm.
  *
  * @code{.cpp}
- *   #include <raft/core/device_resources.hpp>
+ *   #include <raft/core/resources.hpp>
  *   #include <raft/cluster/kmeans.cuh>
  *   #include <raft/cluster/kmeans_types.hpp>
  *   using namespace raft::cluster;
  *   ...
- *   raft::raft::device_resources handle;
+ *   raft::raft::resources handle;
  *   raft::cluster::KMeansParams params;
  *   int n_features = 15, inertia, n_iter;
  *   auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
@@ -84,7 +85,7 @@ using KeyValueIndexOp = detail::KeyValueIndexOp<IndexT, DataT>;
  * @param[out]    n_iter        Number of iterations run.
  */
 template <typename DataT, typename IndexT>
-void fit(raft::device_resources const& handle,
+void fit(raft::resources const& handle,
          const KMeansParams& params,
          raft::device_matrix_view<const DataT, IndexT> X,
          std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -99,12 +100,12 @@ void fit(raft::device_resources const& handle,
  * @brief Predict the closest cluster each sample in X belongs to.
  *
  * @code{.cpp}
- *   #include <raft/core/device_resources.hpp>
+ *   #include <raft/core/resources.hpp>
  *   #include <raft/cluster/kmeans.cuh>
  *   #include <raft/cluster/kmeans_types.hpp>
  *   using namespace raft::cluster;
  *   ...
- *   raft::raft::device_resources handle;
+ *   raft::raft::resources handle;
  *   raft::cluster::KMeansParams params;
  *   int n_features = 15, inertia, n_iter;
  *   auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
@@ -148,7 +149,7 @@ void fit(raft::device_resources const& handle,
  *                                 their closest cluster center.
  */
 template <typename DataT, typename IndexT>
-void predict(raft::device_resources const& handle,
+void predict(raft::resources const& handle,
              const KMeansParams& params,
              raft::device_matrix_view<const DataT, IndexT> X,
              std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -166,12 +167,12 @@ void predict(raft::device_resources const& handle,
  * in the input.
  *
  * @code{.cpp}
- *   #include <raft/core/device_resources.hpp>
+ *   #include <raft/core/resources.hpp>
  *   #include <raft/cluster/kmeans.cuh>
  *   #include <raft/cluster/kmeans_types.hpp>
  *   using namespace raft::cluster;
  *   ...
- *   raft::raft::device_resources handle;
+ *   raft::raft::resources handle;
  *   raft::cluster::KMeansParams params;
  *   int n_features = 15, inertia, n_iter;
  *   auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
@@ -211,7 +212,7 @@ void predict(raft::device_resources const& handle,
  * @param[out]    n_iter        Number of iterations run.
  */
 template <typename DataT, typename IndexT>
-void fit_predict(raft::device_resources const& handle,
+void fit_predict(raft::resources const& handle,
                  const KMeansParams& params,
                  raft::device_matrix_view<const DataT, IndexT> X,
                  std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -240,7 +241,7 @@ void fit_predict(raft::device_resources const& handle,
  *                              [dim = n_samples x n_features]
  */
 template <typename DataT, typename IndexT>
-void transform(raft::device_resources const& handle,
+void transform(raft::resources const& handle,
                const KMeansParams& params,
                raft::device_matrix_view<const DataT, IndexT> X,
                raft::device_matrix_view<const DataT, IndexT> centroids,
@@ -250,7 +251,7 @@ void transform(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT>
-void transform(raft::device_resources const& handle,
+void transform(raft::resources const& handle,
                const KMeansParams& params,
                const DataT* X,
                const DataT* centroids,
@@ -303,7 +304,7 @@ void transform(raft::device_resources const& handle,
  * @param tol tolerance for early stopping convergence
  */
 template <typename idx_t, typename value_t>
-void find_k(raft::device_resources const& handle,
+void find_k(raft::resources const& handle,
             raft::device_matrix_view<const value_t, idx_t> X,
             raft::host_scalar_view<idx_t> best_k,
             raft::host_scalar_view<value_t> inertia,
@@ -336,7 +337,7 @@ void find_k(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT>
-void sample_centroids(raft::device_resources const& handle,
+void sample_centroids(raft::resources const& handle,
                       raft::device_matrix_view<const DataT, IndexT> X,
                       raft::device_vector_view<DataT, IndexT> minClusterDistance,
                       raft::device_vector_view<std::uint8_t, IndexT> isSampleCentroid,
@@ -363,7 +364,7 @@ void sample_centroids(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT, typename ReductionOpT>
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   raft::device_vector_view<DataT, IndexT> minClusterDistance,
                   rmm::device_uvector<char>& workspace,
                   raft::device_scalar_view<DataT> clusterCost,
@@ -389,7 +390,7 @@ void cluster_cost(raft::device_resources const& handle,
  * @param[out] new_centroids: output matrix of updated centroids (size n_clusters, n_features)
  */
 template <typename DataT, typename IndexT, typename LabelsIterator>
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       raft::device_matrix_view<const DataT, IndexT, row_major> X,
                       raft::device_vector_view<const DataT, IndexT> sample_weights,
                       raft::device_matrix_view<const DataT, IndexT, row_major> centroids,
@@ -400,7 +401,7 @@ void update_centroids(raft::device_resources const& handle,
   // TODO: Passing these into the algorithm doesn't really present much of a benefit
   // because they are being resized anyways.
   // ref https://github.com/rapidsai/raft/issues/930
-  rmm::device_uvector<char> workspace(0, handle.get_stream());
+  rmm::device_uvector<char> workspace(0, resource::get_cuda_stream(handle));
 
   detail::update_centroids<DataT, IndexT>(
     handle, X, sample_weights, centroids, labels, weight_per_cluster, new_centroids, workspace);
@@ -430,7 +431,7 @@ void update_centroids(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT>
-void min_cluster_distance(raft::device_resources const& handle,
+void min_cluster_distance(raft::resources const& handle,
                           raft::device_matrix_view<const DataT, IndexT> X,
                           raft::device_matrix_view<DataT, IndexT> centroids,
                           raft::device_vector_view<DataT, IndexT> minClusterDistance,
@@ -481,7 +482,7 @@ void min_cluster_distance(raft::device_resources const& handle,
  */
 template <typename DataT, typename IndexT>
 void min_cluster_and_distance(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const DataT, IndexT> X,
   raft::device_matrix_view<const DataT, IndexT> centroids,
   raft::device_vector_view<raft::KeyValuePair<IndexT, DataT>, IndexT> minClusterAndDistance,
@@ -521,7 +522,7 @@ void min_cluster_and_distance(
  *
  */
 template <typename DataT, typename IndexT>
-void shuffle_and_gather(raft::device_resources const& handle,
+void shuffle_and_gather(raft::resources const& handle,
                         raft::device_matrix_view<const DataT, IndexT> in,
                         raft::device_matrix_view<DataT, IndexT> out,
                         uint32_t n_samples_to_gather,
@@ -550,7 +551,7 @@ void shuffle_and_gather(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT>
-void count_samples_in_cluster(raft::device_resources const& handle,
+void count_samples_in_cluster(raft::resources const& handle,
                               const KMeansParams& params,
                               raft::device_matrix_view<const DataT, IndexT> X,
                               raft::device_vector_view<DataT, IndexT> L2NormX,
@@ -580,7 +581,7 @@ void count_samples_in_cluster(raft::device_resources const& handle,
  * @param[in]  workspace             Temporary workspace buffer which can get resized
  */
 template <typename DataT, typename IndexT>
-void init_plus_plus(raft::device_resources const& handle,
+void init_plus_plus(raft::resources const& handle,
                     const KMeansParams& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     raft::device_matrix_view<DataT, IndexT> centroids,
@@ -613,7 +614,7 @@ void init_plus_plus(raft::device_resources const& handle,
  * @param[in]     workspace     Temporary workspace buffer which can get resized
  */
 template <typename DataT, typename IndexT>
-void fit_main(raft::device_resources const& handle,
+void fit_main(raft::resources const& handle,
               const KMeansParams& params,
               raft::device_matrix_view<const DataT, IndexT> X,
               raft::device_vector_view<const DataT, IndexT> sample_weights,
@@ -660,7 +661,7 @@ namespace raft::cluster {
  * @param[out]    n_iter        Number of iterations run.
  */
 template <typename DataT, typename IndexT = int>
-void kmeans_fit(raft::device_resources const& handle,
+void kmeans_fit(raft::resources const& handle,
                 const KMeansParams& params,
                 raft::device_matrix_view<const DataT, IndexT> X,
                 std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -672,7 +673,7 @@ void kmeans_fit(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_fit(raft::device_resources const& handle,
+void kmeans_fit(raft::resources const& handle,
                 const KMeansParams& params,
                 const DataT* X,
                 const DataT* sample_weight,
@@ -707,7 +708,7 @@ void kmeans_fit(raft::device_resources const& handle,
  *                                 their closest cluster center.
  */
 template <typename DataT, typename IndexT = int>
-void kmeans_predict(raft::device_resources const& handle,
+void kmeans_predict(raft::resources const& handle,
                     const KMeansParams& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -721,7 +722,7 @@ void kmeans_predict(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_predict(raft::device_resources const& handle,
+void kmeans_predict(raft::resources const& handle,
                     const KMeansParams& params,
                     const DataT* X,
                     const DataT* sample_weight,
@@ -772,7 +773,7 @@ void kmeans_predict(raft::device_resources const& handle,
  * @param[out]    n_iter        Number of iterations run.
  */
 template <typename DataT, typename IndexT = int>
-void kmeans_fit_predict(raft::device_resources const& handle,
+void kmeans_fit_predict(raft::resources const& handle,
                         const KMeansParams& params,
                         raft::device_matrix_view<const DataT, IndexT> X,
                         std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
@@ -786,7 +787,7 @@ void kmeans_fit_predict(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_fit_predict(raft::device_resources const& handle,
+void kmeans_fit_predict(raft::resources const& handle,
                         const KMeansParams& params,
                         const DataT* X,
                         const DataT* sample_weight,
@@ -817,7 +818,7 @@ void kmeans_fit_predict(raft::device_resources const& handle,
  *                              [dim = n_samples x n_features]
  */
 template <typename DataT, typename IndexT = int>
-void kmeans_transform(raft::device_resources const& handle,
+void kmeans_transform(raft::resources const& handle,
                       const KMeansParams& params,
                       raft::device_matrix_view<const DataT, IndexT> X,
                       raft::device_matrix_view<const DataT, IndexT> centroids,
@@ -827,7 +828,7 @@ void kmeans_transform(raft::device_resources const& handle,
 }
 
 template <typename DataT, typename IndexT = int>
-void kmeans_transform(raft::device_resources const& handle,
+void kmeans_transform(raft::resources const& handle,
                       const KMeansParams& params,
                       const DataT* X,
                       const DataT* centroids,
@@ -864,7 +865,7 @@ using KeyValueIndexOp = kmeans::KeyValueIndexOp<IndexT, DataT>;
  *
  */
 template <typename DataT, typename IndexT>
-void sampleCentroids(raft::device_resources const& handle,
+void sampleCentroids(raft::resources const& handle,
                      raft::device_matrix_view<const DataT, IndexT> X,
                      raft::device_vector_view<DataT, IndexT> minClusterDistance,
                      raft::device_vector_view<std::uint8_t, IndexT> isSampleCentroid,
@@ -891,7 +892,7 @@ void sampleCentroids(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT, typename ReductionOpT>
-void computeClusterCost(raft::device_resources const& handle,
+void computeClusterCost(raft::resources const& handle,
                         raft::device_vector_view<DataT, IndexT> minClusterDistance,
                         rmm::device_uvector<char>& workspace,
                         raft::device_scalar_view<DataT> clusterCost,
@@ -922,7 +923,7 @@ void computeClusterCost(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT>
-void minClusterDistanceCompute(raft::device_resources const& handle,
+void minClusterDistanceCompute(raft::resources const& handle,
                                const KMeansParams& params,
                                raft::device_matrix_view<const DataT, IndexT> X,
                                raft::device_matrix_view<DataT, IndexT> centroids,
@@ -969,7 +970,7 @@ void minClusterDistanceCompute(raft::device_resources const& handle,
  */
 template <typename DataT, typename IndexT>
 void minClusterAndDistanceCompute(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const KMeansParams& params,
   raft::device_matrix_view<const DataT, IndexT> X,
   raft::device_matrix_view<const DataT, IndexT> centroids,
@@ -1007,7 +1008,7 @@ void minClusterAndDistanceCompute(
  *
  */
 template <typename DataT, typename IndexT>
-void shuffleAndGather(raft::device_resources const& handle,
+void shuffleAndGather(raft::resources const& handle,
                       raft::device_matrix_view<const DataT, IndexT> in,
                       raft::device_matrix_view<DataT, IndexT> out,
                       uint32_t n_samples_to_gather,
@@ -1036,7 +1037,7 @@ void shuffleAndGather(raft::device_resources const& handle,
  *
  */
 template <typename DataT, typename IndexT>
-void countSamplesInCluster(raft::device_resources const& handle,
+void countSamplesInCluster(raft::resources const& handle,
                            const KMeansParams& params,
                            raft::device_matrix_view<const DataT, IndexT> X,
                            raft::device_vector_view<DataT, IndexT> L2NormX,
@@ -1067,7 +1068,7 @@ void countSamplesInCluster(raft::device_resources const& handle,
  * @param[in]  workspace             Temporary workspace buffer which can get resized
  */
 template <typename DataT, typename IndexT>
-void kmeansPlusPlus(raft::device_resources const& handle,
+void kmeansPlusPlus(raft::resources const& handle,
                     const KMeansParams& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     raft::device_matrix_view<DataT, IndexT> centroidsRawData,
@@ -1100,7 +1101,7 @@ void kmeansPlusPlus(raft::device_resources const& handle,
  * @param[in]     workspace     Temporary workspace buffer which can get resized
  */
 template <typename DataT, typename IndexT>
-void kmeans_fit_main(raft::device_resources const& handle,
+void kmeans_fit_main(raft::resources const& handle,
                      const KMeansParams& params,
                      raft::device_matrix_view<const DataT, IndexT> X,
                      raft::device_vector_view<const DataT, IndexT> weight,
diff --git a/cpp/include/raft/cluster/kmeans_balanced.cuh b/cpp/include/raft/cluster/kmeans_balanced.cuh
index 405c7a8018..5c59f1393c 100644
--- a/cpp/include/raft/cluster/kmeans_balanced.cuh
+++ b/cpp/include/raft/cluster/kmeans_balanced.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <utility>
 
 #include <raft/cluster/detail/kmeans_balanced.cuh>
@@ -72,7 +73,7 @@ namespace raft::cluster::kmeans_balanced {
  *                        datatype. If DataT == MathT, this must be the identity.
  */
 template <typename DataT, typename MathT, typename IndexT, typename MappingOpT = raft::identity_op>
-void fit(const raft::device_resources& handle,
+void fit(const raft::resources& handle,
          kmeans_balanced_params const& params,
          raft::device_matrix_view<const DataT, IndexT> X,
          raft::device_matrix_view<MathT, IndexT> centroids,
@@ -130,7 +131,7 @@ template <typename DataT,
           typename IndexT,
           typename LabelT,
           typename MappingOpT = raft::identity_op>
-void predict(const raft::device_resources& handle,
+void predict(const raft::resources& handle,
              kmeans_balanced_params const& params,
              raft::device_matrix_view<const DataT, IndexT> X,
              raft::device_matrix_view<const MathT, IndexT> centroids,
@@ -195,7 +196,7 @@ template <typename DataT,
           typename IndexT,
           typename LabelT,
           typename MappingOpT = raft::identity_op>
-void fit_predict(const raft::device_resources& handle,
+void fit_predict(const raft::resources& handle,
                  kmeans_balanced_params const& params,
                  raft::device_matrix_view<const DataT, IndexT> X,
                  raft::device_matrix_view<MathT, IndexT> centroids,
@@ -254,7 +255,7 @@ template <typename DataT,
           typename LabelT,
           typename CounterT,
           typename MappingOpT>
-void build_clusters(const raft::device_resources& handle,
+void build_clusters(const raft::resources& handle,
                     const kmeans_balanced_params& params,
                     raft::device_matrix_view<const DataT, IndexT> X,
                     raft::device_matrix_view<MathT, IndexT> centroids,
@@ -280,7 +281,7 @@ void build_clusters(const raft::device_resources& handle,
                          labels.data_handle(),
                          cluster_sizes.data_handle(),
                          mapping_op,
-                         handle.get_workspace_resource(),
+                         resource::get_workspace_resource(handle),
                          X_norm.has_value() ? X_norm.value().data_handle() : nullptr);
 }
 
@@ -333,7 +334,7 @@ template <typename DataT,
           typename LabelT,
           typename CounterT,
           typename MappingOpT = raft::identity_op>
-void calc_centers_and_sizes(const raft::device_resources& handle,
+void calc_centers_and_sizes(const raft::resources& handle,
                             raft::device_matrix_view<const DataT, IndexT> X,
                             raft::device_vector_view<const LabelT, IndexT> labels,
                             raft::device_matrix_view<MathT, IndexT> centroids,
diff --git a/cpp/include/raft/cluster/kmeans_deprecated.cuh b/cpp/include/raft/cluster/kmeans_deprecated.cuh
index 8e0861ada1..11f964eef5 100644
--- a/cpp/include/raft/cluster/kmeans_deprecated.cuh
+++ b/cpp/include/raft/cluster/kmeans_deprecated.cuh
@@ -46,7 +46,7 @@ namespace cluster {
  *  @return error flag
  */
 template <typename index_type_t, typename value_type_t>
-int kmeans(raft::device_resources const& handle,
+int kmeans(raft::resources const& handle,
            index_type_t n,
            index_type_t d,
            index_type_t k,
diff --git a/cpp/include/raft/cluster/single_linkage.cuh b/cpp/include/raft/cluster/single_linkage.cuh
index 91241b853b..d9eba6edc5 100644
--- a/cpp/include/raft/cluster/single_linkage.cuh
+++ b/cpp/include/raft/cluster/single_linkage.cuh
@@ -50,7 +50,7 @@ namespace raft::cluster {
 template <typename value_idx,
           typename value_t,
           LinkageDistance dist_type = LinkageDistance::KNN_GRAPH>
-void single_linkage(raft::device_resources const& handle,
+void single_linkage(raft::resources const& handle,
                     const value_t* X,
                     size_t m,
                     size_t n,
@@ -87,7 +87,7 @@ constexpr int DEFAULT_CONST_C = 15;
  control of k. The algorithm will set `k = log(n) + c`
  */
 template <typename value_t, typename idx_t, LinkageDistance dist_type = LinkageDistance::KNN_GRAPH>
-void single_linkage(raft::device_resources const& handle,
+void single_linkage(raft::resources const& handle,
                     raft::device_matrix_view<const value_t, idx_t, row_major> X,
                     raft::device_matrix_view<idx_t, idx_t, row_major> dendrogram,
                     raft::device_vector_view<idx_t, idx_t> labels,
diff --git a/cpp/include/raft/comms/comms_test.hpp b/cpp/include/raft/comms/comms_test.hpp
index c61bb32f79..3ceb2942a8 100644
--- a/cpp/include/raft/comms/comms_test.hpp
+++ b/cpp/include/raft/comms/comms_test.hpp
@@ -19,7 +19,7 @@
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/test.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace comms {
@@ -31,7 +31,7 @@ namespace comms {
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_allreduce(raft::device_resources const& handle, int root)
+bool test_collective_allreduce(raft::resources const& handle, int root)
 {
   return detail::test_collective_allreduce(handle, root);
 }
@@ -43,7 +43,7 @@ bool test_collective_allreduce(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_broadcast(raft::device_resources const& handle, int root)
+bool test_collective_broadcast(raft::resources const& handle, int root)
 {
   return detail::test_collective_broadcast(handle, root);
 }
@@ -55,7 +55,7 @@ bool test_collective_broadcast(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_reduce(raft::device_resources const& handle, int root)
+bool test_collective_reduce(raft::resources const& handle, int root)
 {
   return detail::test_collective_reduce(handle, root);
 }
@@ -67,7 +67,7 @@ bool test_collective_reduce(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_allgather(raft::device_resources const& handle, int root)
+bool test_collective_allgather(raft::resources const& handle, int root)
 {
   return detail::test_collective_allgather(handle, root);
 }
@@ -79,7 +79,7 @@ bool test_collective_allgather(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_gather(raft::device_resources const& handle, int root)
+bool test_collective_gather(raft::resources const& handle, int root)
 {
   return detail::test_collective_gather(handle, root);
 }
@@ -91,7 +91,7 @@ bool test_collective_gather(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_gatherv(raft::device_resources const& handle, int root)
+bool test_collective_gatherv(raft::resources const& handle, int root)
 {
   return detail::test_collective_gatherv(handle, root);
 }
@@ -103,7 +103,7 @@ bool test_collective_gatherv(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_reducescatter(raft::device_resources const& handle, int root)
+bool test_collective_reducescatter(raft::resources const& handle, int root)
 {
   return detail::test_collective_reducescatter(handle, root);
 }
@@ -115,7 +115,7 @@ bool test_collective_reducescatter(raft::device_resources const& handle, int roo
  *        initialized comms instance.
  * @param[in] numTrials number of iterations of all-to-all messaging to perform
  */
-bool test_pointToPoint_simple_send_recv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_simple_send_recv(raft::resources const& h, int numTrials)
 {
   return detail::test_pointToPoint_simple_send_recv(h, numTrials);
 }
@@ -127,7 +127,7 @@ bool test_pointToPoint_simple_send_recv(raft::device_resources const& h, int num
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_send_or_recv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_send_or_recv(raft::resources const& h, int numTrials)
 {
   return detail::test_pointToPoint_device_send_or_recv(h, numTrials);
 }
@@ -139,7 +139,7 @@ bool test_pointToPoint_device_send_or_recv(raft::device_resources const& h, int
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_sendrecv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_sendrecv(raft::resources const& h, int numTrials)
 {
   return detail::test_pointToPoint_device_sendrecv(h, numTrials);
 }
@@ -151,7 +151,7 @@ bool test_pointToPoint_device_sendrecv(raft::device_resources const& h, int numT
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_multicast_sendrecv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_multicast_sendrecv(raft::resources const& h, int numTrials)
 {
   return detail::test_pointToPoint_device_multicast_sendrecv(h, numTrials);
 }
@@ -163,7 +163,7 @@ bool test_pointToPoint_device_multicast_sendrecv(raft::device_resources const& h
  *        initialized comms instance.
  * @param n_colors number of different colors to test
  */
-bool test_commsplit(raft::device_resources const& h, int n_colors)
+bool test_commsplit(raft::resources const& h, int n_colors)
 {
   return detail::test_commsplit(h, n_colors);
 }
diff --git a/cpp/include/raft/comms/detail/mpi_comms.hpp b/cpp/include/raft/comms/detail/mpi_comms.hpp
index 4062389eea..3342fec973 100644
--- a/cpp/include/raft/comms/detail/mpi_comms.hpp
+++ b/cpp/include/raft/comms/detail/mpi_comms.hpp
@@ -28,8 +28,8 @@
 
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/util.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
diff --git a/cpp/include/raft/comms/detail/std_comms.hpp b/cpp/include/raft/comms/detail/std_comms.hpp
index 0db27f0a45..8b92ed48f7 100644
--- a/cpp/include/raft/comms/detail/std_comms.hpp
+++ b/cpp/include/raft/comms/detail/std_comms.hpp
@@ -20,7 +20,7 @@
 #include <raft/comms/detail/ucp_helper.hpp>
 #include <raft/comms/detail/util.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/comms/detail/test.hpp b/cpp/include/raft/comms/detail/test.hpp
index 2b12bf2d2a..876a17de1a 100644
--- a/cpp/include/raft/comms/detail/test.hpp
+++ b/cpp/include/raft/comms/detail/test.hpp
@@ -17,7 +17,9 @@
 #pragma once
 
 #include <raft/comms/comms.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/comms.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -38,13 +40,13 @@ namespace detail {
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_allreduce(raft::device_resources const& handle, int root)
+bool test_collective_allreduce(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   int const send = 1;
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_scalar<int> temp_d(stream);
   RAFT_CUDA_TRY(cudaMemcpyAsync(temp_d.data(), &send, 1, cudaMemcpyHostToDevice, stream));
@@ -53,7 +55,7 @@ bool test_collective_allreduce(raft::device_resources const& handle, int root)
 
   int temp_h = 0;
   RAFT_CUDA_TRY(cudaMemcpyAsync(&temp_h, temp_d.data(), 1, cudaMemcpyDeviceToHost, stream));
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   communicator.barrier();
 
   std::cout << "Clique size: " << communicator.get_size() << std::endl;
@@ -69,13 +71,13 @@ bool test_collective_allreduce(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_broadcast(raft::device_resources const& handle, int root)
+bool test_collective_broadcast(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   int const send = root;
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_scalar<int> temp_d(stream);
 
@@ -88,7 +90,7 @@ bool test_collective_broadcast(raft::device_resources const& handle, int root)
   int temp_h = -1;  // Verify more than one byte is being sent
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(&temp_h, temp_d.data(), sizeof(int), cudaMemcpyDeviceToHost, stream));
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   communicator.barrier();
 
   std::cout << "Clique size: " << communicator.get_size() << std::endl;
@@ -104,13 +106,13 @@ bool test_collective_broadcast(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_reduce(raft::device_resources const& handle, int root)
+bool test_collective_reduce(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   int const send = root;
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_scalar<int> temp_d(stream);
 
@@ -121,7 +123,7 @@ bool test_collective_reduce(raft::device_resources const& handle, int root)
   int temp_h = -1;  // Verify more than one byte is being sent
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(&temp_h, temp_d.data(), sizeof(int), cudaMemcpyDeviceToHost, stream));
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   communicator.barrier();
 
   std::cout << "Clique size: " << communicator.get_size() << std::endl;
@@ -140,13 +142,13 @@ bool test_collective_reduce(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_allgather(raft::device_resources const& handle, int root)
+bool test_collective_allgather(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   int const send = communicator.get_rank();
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_scalar<int> temp_d(stream);
   rmm::device_uvector<int> recv_d(communicator.get_size(), stream);
@@ -158,7 +160,7 @@ bool test_collective_allgather(raft::device_resources const& handle, int root)
   int temp_h[communicator.get_size()];  // Verify more than one byte is being sent
   RAFT_CUDA_TRY(cudaMemcpyAsync(
     &temp_h, recv_d.data(), sizeof(int) * communicator.get_size(), cudaMemcpyDeviceToHost, stream));
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   communicator.barrier();
 
   std::cout << "Clique size: " << communicator.get_size() << std::endl;
@@ -177,13 +179,13 @@ bool test_collective_allgather(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_gather(raft::device_resources const& handle, int root)
+bool test_collective_gather(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   int const send = communicator.get_rank();
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_scalar<int> temp_d(stream);
   rmm::device_uvector<int> recv_d(communicator.get_rank() == root ? communicator.get_size() : 0,
@@ -198,7 +200,7 @@ bool test_collective_gather(raft::device_resources const& handle, int root)
     std::vector<int> temp_h(communicator.get_size(), 0);
     RAFT_CUDA_TRY(cudaMemcpyAsync(
       temp_h.data(), recv_d.data(), sizeof(int) * temp_h.size(), cudaMemcpyDeviceToHost, stream));
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     for (int i = 0; i < communicator.get_size(); i++) {
       if (temp_h[i] != i) return false;
@@ -214,9 +216,9 @@ bool test_collective_gather(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_gatherv(raft::device_resources const& handle, int root)
+bool test_collective_gatherv(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   std::vector<size_t> sendcounts(communicator.get_size());
   std::iota(sendcounts.begin(), sendcounts.end(), size_t{1});
@@ -227,7 +229,7 @@ bool test_collective_gatherv(raft::device_resources const& handle, int root)
     displacements[communicator.get_rank() + 1] - displacements[communicator.get_rank()],
     communicator.get_rank());
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<int> temp_d(sends.size(), stream);
   rmm::device_uvector<int> recv_d(communicator.get_rank() == root ? displacements.back() : 0,
@@ -253,7 +255,7 @@ bool test_collective_gatherv(raft::device_resources const& handle, int root)
                                   sizeof(int) * displacements.back(),
                                   cudaMemcpyDeviceToHost,
                                   stream));
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     for (int i = 0; i < communicator.get_size(); i++) {
       if (std::count_if(temp_h.begin() + displacements[i],
@@ -273,13 +275,13 @@ bool test_collective_gatherv(raft::device_resources const& handle, int root)
  *        initialized comms instance.
  *  @param[in] root the root rank id
  */
-bool test_collective_reducescatter(raft::device_resources const& handle, int root)
+bool test_collective_reducescatter(raft::resources const& handle, int root)
 {
-  comms_t const& communicator = handle.get_comms();
+  comms_t const& communicator = resource::get_comms(handle);
 
   std::vector<int> sends(communicator.get_size(), 1);
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<int> temp_d(sends.size(), stream);
   rmm::device_scalar<int> recv_d(stream);
@@ -292,7 +294,7 @@ bool test_collective_reducescatter(raft::device_resources const& handle, int roo
   int temp_h = -1;  // Verify more than one byte is being sent
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(&temp_h, recv_d.data(), sizeof(int), cudaMemcpyDeviceToHost, stream));
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   communicator.barrier();
 
   std::cout << "Clique size: " << communicator.get_size() << std::endl;
@@ -308,9 +310,9 @@ bool test_collective_reducescatter(raft::device_resources const& handle, int roo
  *        initialized comms instance.
  * @param[in] numTrials number of iterations of all-to-all messaging to perform
  */
-bool test_pointToPoint_simple_send_recv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_simple_send_recv(raft::resources const& h, int numTrials)
 {
-  comms_t const& communicator = h.get_comms();
+  comms_t const& communicator = resource::get_comms(h);
   int const rank              = communicator.get_rank();
 
   bool ret = true;
@@ -373,11 +375,11 @@ bool test_pointToPoint_simple_send_recv(raft::device_resources const& h, int num
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_send_or_recv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_send_or_recv(raft::resources const& h, int numTrials)
 {
-  comms_t const& communicator = h.get_comms();
+  comms_t const& communicator = resource::get_comms(h);
   int const rank              = communicator.get_rank();
-  cudaStream_t stream         = h.get_stream();
+  cudaStream_t stream         = resource::get_cuda_stream(h);
 
   bool ret = true;
   for (int i = 0; i < numTrials; i++) {
@@ -415,11 +417,11 @@ bool test_pointToPoint_device_send_or_recv(raft::device_resources const& h, int
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_sendrecv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_sendrecv(raft::resources const& h, int numTrials)
 {
-  comms_t const& communicator = h.get_comms();
+  comms_t const& communicator = resource::get_comms(h);
   int const rank              = communicator.get_rank();
-  cudaStream_t stream         = h.get_stream();
+  cudaStream_t stream         = resource::get_cuda_stream(h);
 
   bool ret = true;
   for (int i = 0; i < numTrials; i++) {
@@ -461,11 +463,11 @@ bool test_pointToPoint_device_sendrecv(raft::device_resources const& h, int numT
  *        initialized comms instance.
  * @param numTrials number of iterations of send or receive messaging to perform
  */
-bool test_pointToPoint_device_multicast_sendrecv(raft::device_resources const& h, int numTrials)
+bool test_pointToPoint_device_multicast_sendrecv(raft::resources const& h, int numTrials)
 {
-  comms_t const& communicator = h.get_comms();
+  comms_t const& communicator = resource::get_comms(h);
   int const rank              = communicator.get_rank();
-  cudaStream_t stream         = h.get_stream();
+  cudaStream_t stream         = resource::get_cuda_stream(h);
 
   bool ret = true;
   for (int i = 0; i < numTrials; i++) {
@@ -502,7 +504,7 @@ bool test_pointToPoint_device_multicast_sendrecv(raft::device_resources const& h
 
     std::vector<int> h_received_data(communicator.get_size());
     raft::update_host(h_received_data.data(), received_data.data(), received_data.size(), stream);
-    h.sync_stream(stream);
+    resource::sync_stream(h, stream);
     for (int i = 0; i < communicator.get_size(); ++i) {
       if (h_received_data[i] != i) { ret = false; }
     }
@@ -520,9 +522,9 @@ bool test_pointToPoint_device_multicast_sendrecv(raft::device_resources const& h
  *        initialized comms instance.
  * @param n_colors number of different colors to test
  */
-bool test_commsplit(raft::device_resources const& h, int n_colors)
+bool test_commsplit(raft::resources const& h, int n_colors)
 {
-  comms_t const& communicator = h.get_comms();
+  comms_t const& communicator = resource::get_comms(h);
   int const rank              = communicator.get_rank();
   int const size              = communicator.get_size();
 
diff --git a/cpp/include/raft/comms/mpi_comms.hpp b/cpp/include/raft/comms/mpi_comms.hpp
index 9076176ea6..bc09c5c622 100644
--- a/cpp/include/raft/comms/mpi_comms.hpp
+++ b/cpp/include/raft/comms/mpi_comms.hpp
@@ -18,6 +18,8 @@
 
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/mpi_comms.hpp>
+#include <raft/core/resource/comms.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 
 namespace raft {
 namespace comms {
@@ -40,26 +42,26 @@ using mpi_comms = detail::mpi_comms;
  * #include <raft/core/device_mdarray.hpp>
  *
  * MPI_Comm mpi_comm;
- * raft::raft::device_resources handle;
+ * raft::raft::resources handle;
  *
  * initialize_mpi_comms(&handle, mpi_comm);
  * ...
- * const auto& comm = handle.get_comms();
+ * const auto& comm = resource::get_comms(handle);
  * auto gather_data = raft::make_device_vector<float>(handle, comm.get_size());
  * ...
  * comm.allgather((gather_data.data_handle())[comm.get_rank()],
  *                gather_data.data_handle(),
  *                1,
- *                handle.get_stream());
+ *                resource::get_cuda_stream(handle));
  *
- * comm.sync_stream(handle.get_stream());
+ * comm.sync_stream(resource::get_cuda_stream(handle));
  * @endcode
  */
-inline void initialize_mpi_comms(device_resources* handle, MPI_Comm comm)
+inline void initialize_mpi_comms(resources* handle, MPI_Comm comm)
 {
   auto communicator = std::make_shared<comms_t>(
-    std::unique_ptr<comms_iface>(new mpi_comms(comm, false, handle->get_stream())));
-  handle->set_comms(communicator);
+    std::unique_ptr<comms_iface>(new mpi_comms(comm, false, resource::get_cuda_stream(*handle))));
+  resource::set_comms(*handle, communicator);
 };
 
 /**
diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp
index 6370d4a8e6..165f721708 100644
--- a/cpp/include/raft/comms/std_comms.hpp
+++ b/cpp/include/raft/comms/std_comms.hpp
@@ -16,7 +16,9 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/comms.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/std_comms.hpp>
@@ -39,7 +41,7 @@ using std_comms = detail::std_comms;
  * Factory function to construct a RAFT NCCL communicator and inject it into a
  * RAFT handle.
  *
- * @param handle raft::device_resources for injecting the comms
+ * @param handle raft::resources for injecting the comms
  * @param nccl_comm initialized NCCL communicator to use for collectives
  * @param num_ranks number of ranks in communicator clique
  * @param rank rank of local instance
@@ -49,35 +51,35 @@ using std_comms = detail::std_comms;
  * #include <raft/core/device_mdarray.hpp>
  *
  * ncclComm_t nccl_comm;
- * raft::raft::device_resources handle;
+ * raft::raft::resources handle;
  *
  * build_comms_nccl_only(&handle, nccl_comm, 5, 0);
  * ...
- * const auto& comm = handle.get_comms();
+ * const auto& comm = resource::get_comms(handle);
  * auto gather_data = raft::make_device_vector<float>(handle, comm.get_size());
  * ...
  * comm.allgather((gather_data.data_handle())[comm.get_rank()],
  *                gather_data.data_handle(),
  *                1,
- *                handle.get_stream());
+ *                resource::get_cuda_stream(handle));
  *
- * comm.sync_stream(handle.get_stream());
+ * comm.sync_stream(resource::get_cuda_stream(handle));
  * @endcode
  */
-void build_comms_nccl_only(device_resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank)
+void build_comms_nccl_only(resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank)
 {
-  cudaStream_t stream = handle->get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(*handle);
 
   auto communicator = std::make_shared<comms_t>(
     std::unique_ptr<comms_iface>(new raft::comms::std_comms(nccl_comm, num_ranks, rank, stream)));
-  handle->set_comms(communicator);
+  resource::set_comms(*handle, communicator);
 }
 
 /**
  * Factory function to construct a RAFT NCCL+UCX and inject it into a RAFT
  * handle.
  *
- * @param handle raft::device_resources for injecting the comms
+ * @param handle raft::resources for injecting the comms
  * @param nccl_comm initialized NCCL communicator to use for collectives
  * @param ucp_worker of local process
  *        Note: This is purposefully left as void* so that the ucp_worker_h
@@ -93,29 +95,25 @@ void build_comms_nccl_only(device_resources* handle, ncclComm_t nccl_comm, int n
  * #include <raft/core/device_mdarray.hpp>
  *
  * ncclComm_t nccl_comm;
- * raft::raft::device_resources handle;
+ * raft::raft::resources handle;
  * ucp_worker_h ucp_worker;
  * ucp_ep_h *ucp_endpoints_arr;
  *
  * build_comms_nccl_ucx(&handle, nccl_comm, &ucp_worker, ucp_endpoints_arr, 5, 0);
  * ...
- * const auto& comm = handle.get_comms();
+ * const auto& comm = resource::get_comms(handle);
  * auto gather_data = raft::make_device_vector<float>(handle, comm.get_size());
  * ...
  * comm.allgather((gather_data.data_handle())[comm.get_rank()],
  *                gather_data.data_handle(),
  *                1,
- *                handle.get_stream());
+ *                resource::get_cuda_stream(handle));
  *
- * comm.sync_stream(handle.get_stream());
+ * comm.sync_stream(resource::get_cuda_stream(handle));
  * @endcode
  */
-void build_comms_nccl_ucx(device_resources* handle,
-                          ncclComm_t nccl_comm,
-                          void* ucp_worker,
-                          void* eps,
-                          int num_ranks,
-                          int rank)
+void build_comms_nccl_ucx(
+  resources* handle, ncclComm_t nccl_comm, void* ucp_worker, void* eps, int num_ranks, int rank)
 {
   auto eps_sp = std::make_shared<ucp_ep_h*>(new ucp_ep_h[num_ranks]);
 
@@ -133,12 +131,12 @@ void build_comms_nccl_ucx(device_resources* handle,
     }
   }
 
-  cudaStream_t stream = handle->get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(*handle);
 
   auto communicator =
     std::make_shared<comms_t>(std::unique_ptr<comms_iface>(new raft::comms::std_comms(
       nccl_comm, (ucp_worker_h)ucp_worker, eps_sp, num_ranks, rank, stream)));
-  handle->set_comms(communicator);
+  resource::set_comms(*handle, communicator);
 }
 
 /**
diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
index d0aea4168e..328080da1f 100644
--- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
+++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp
@@ -17,14 +17,15 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 
 #include <algorithm>
 #include <complex>
 #include <cstdint>
 #include <cstring>
 #include <iostream>
+#include <map>
 #include <sstream>
 #include <string>
 #include <type_traits>
diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp
index ce016dd5e0..67aa4e12f1 100644
--- a/cpp/include/raft/core/device_coo_matrix.hpp
+++ b/cpp/include/raft/core/device_coo_matrix.hpp
@@ -110,13 +110,13 @@ constexpr bool is_device_coo_sparsity_preserving_v =
  * on the instance once the sparsity is known.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_matrix = raft::make_device_coo_matrix(handle, n_rows, n_cols);
  * ...
  * // compute expected sparsity
@@ -152,13 +152,13 @@ auto make_device_coo_matrix(raft::resources const& handle,
  * be known up front, and cannot be resized later.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure(handle, n_rows, n_cols);
  * ...
  * // compute expected sparsity
@@ -189,7 +189,7 @@ auto make_device_coo_matrix(raft::resources const& handle,
  * coo_matrix if sparsity needs to be mutable.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -199,7 +199,7 @@ auto make_device_coo_matrix(raft::resources const& handle,
  * // The following pointer is assumed to reference device memory for a size of nnz
  * float* d_elm_ptr = ...;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure(handle, n_rows, n_cols, nnz);
  * coo_matrix_view = raft::make_device_coo_matrix_view(handle, d_elm_ptr, coo_structure.view());
  * @endcode
@@ -226,7 +226,7 @@ auto make_device_coo_matrix_view(
  * coo_matrix if sparsity needs to be mutable.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_span.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
@@ -237,7 +237,7 @@ auto make_device_coo_matrix_view(
  * // The following span is assumed to be of size nnz
  * raft::device_span<float> d_elm_ptr;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure(handle, n_rows, n_cols, nnz);
  * coo_matrix_view = raft::make_device_coo_matrix_view(handle, d_elm_ptr, coo_structure.view());
  * @endcode
@@ -266,14 +266,14 @@ auto make_device_coo_matrix_view(
  * underlying data arrays.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  * int nnz = 5000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure(handle, n_rows, n_cols, nnz);
  *  * ...
  * // compute expected sparsity
@@ -305,7 +305,7 @@ auto make_device_coordinate_structure(raft::resources const& handle,
  * sparsity is not known up front.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -316,7 +316,7 @@ auto make_device_coordinate_structure(raft::resources const& handle,
  * int *rows = ...;
  * int *cols = ...;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure_view(handle, rows, cols, n_rows, n_cols,
  * nnz);
  * @endcode
@@ -345,7 +345,7 @@ auto make_device_coordinate_structure_view(
  * sparsity is not known up front.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_coo_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -356,7 +356,7 @@ auto make_device_coordinate_structure_view(
  * raft::device_span<int> rows;
  * raft::device_span<int> cols;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_coordinate_structure_view(handle, rows, cols, n_rows, n_cols);
  * @endcode
  *
diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp
index 869034e925..1495609d75 100644
--- a/cpp/include/raft/core/device_csr_matrix.hpp
+++ b/cpp/include/raft/core/device_csr_matrix.hpp
@@ -17,9 +17,9 @@
 
 #include <raft/core/csr_matrix.hpp>
 #include <raft/core/device_container_policy.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/device_span.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/sparse_types.hpp>
 #include <type_traits>
 
@@ -122,13 +122,13 @@ using device_compressed_structure_view =
  * `resize()` invoked on the instance once the sparsity is known.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_matrix = raft::make_device_csr_matrix(handle, n_rows, n_cols);
  * ...
  * // compute expected sparsity
@@ -151,7 +151,7 @@ template <typename ElementType,
           typename IndptrType,
           typename IndicesType,
           typename NZType = uint64_t>
-auto make_device_csr_matrix(raft::device_resources const& handle,
+auto make_device_csr_matrix(raft::resources const& handle,
                             IndptrType n_rows,
                             IndicesType n_cols,
                             NZType nnz = 0)
@@ -167,13 +167,13 @@ auto make_device_csr_matrix(raft::device_resources const& handle,
  * sparsity, the sparsity must be known up front, and cannot be resized later.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * coo_structure = raft::make_device_compressed_structure(handle, n_rows, n_cols);
  * ...
  * // compute expected sparsity
@@ -195,7 +195,7 @@ template <typename ElementType,
           typename IndicesType,
           typename NZType = uint64_t>
 auto make_device_csr_matrix(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   device_compressed_structure_view<IndptrType, IndicesType, NZType> structure)
 {
   return device_sparsity_preserving_csr_matrix<ElementType, IndptrType, IndicesType, NZType>(
@@ -208,7 +208,7 @@ auto make_device_csr_matrix(
  * coo_matrix if sparsity needs to be mutable.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -218,7 +218,7 @@ auto make_device_csr_matrix(
  * // The following pointer is assumed to reference device memory for a size of nnz
  * float* d_elm_ptr = ...;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_structure = raft::make_device_compressed_structure(handle, n_rows, n_cols, nnz);
  * csr_matrix_view = raft::make_device_csr_matrix_view(handle, d_elm_ptr, csr_structure.view());
  * @endcode
@@ -248,7 +248,7 @@ auto make_device_csr_matrix_view(
  * sparsity-owning coo_matrix if sparsity needs to be mutable.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_span.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
@@ -259,7 +259,7 @@ auto make_device_csr_matrix_view(
  * // The following span is assumed to be of size nnz
  * raft::device_span<float> d_elm_ptr;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_structure = raft::make_device_compressed_structure(handle, n_rows, n_cols, nnz);
  * csr_matrix_view = raft::make_device_csr_matrix_view(handle, d_elm_ptr, csr_structure.view());
  * @endcode
@@ -291,14 +291,14 @@ auto make_device_csr_matrix_view(
  * the allocation of the underlying indices array is delayed until `resize(nnz)` is invoked.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
  * int n_cols = 10000;
  * int nnz = 5000;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_structure = raft::make_device_compressed_structure(handle, n_rows, n_cols, nnz);
  * ...
  * // compute expected sparsity
@@ -316,7 +316,7 @@ auto make_device_csr_matrix_view(
  * @return a sparsity-owning compressed structure instance
  */
 template <typename IndptrType, typename IndicesType, typename NZType = uint64_t>
-auto make_device_compressed_structure(raft::device_resources const& handle,
+auto make_device_compressed_structure(raft::resources const& handle,
                                       IndptrType n_rows,
                                       IndicesType n_cols,
                                       NZType nnz = 0)
@@ -330,7 +330,7 @@ auto make_device_compressed_structure(raft::device_resources const& handle,
  * sparsity is not known up front.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -343,7 +343,7 @@ auto make_device_compressed_structure(raft::device_resources const& handle,
  * // The following pointer is assumed to reference device memory of size nnz
  * int *indices = ...;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_structure = raft::make_device_compressed_structure_view(handle, indptr, indices, n_rows,
  * n_cols, nnz);
  * @endcode *
@@ -375,7 +375,7 @@ auto make_device_compressed_structure_view(
  * sparsity is not known up front.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_csr_matrix.hpp>
  *
  * int n_rows = 100000;
@@ -388,7 +388,7 @@ auto make_device_compressed_structure_view(
  * // The following device span is assumed to be of size nnz
  * raft::device_span<int> indices;
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  * csr_structure = raft::make_device_compressed_structure_view(handle, indptr, indices, n_rows,
  * n_cols);
  * @endcode
diff --git a/cpp/include/raft/core/device_mdarray.hpp b/cpp/include/raft/core/device_mdarray.hpp
index 2c0cb56910..68273db15c 100644
--- a/cpp/include/raft/core/device_mdarray.hpp
+++ b/cpp/include/raft/core/device_mdarray.hpp
@@ -73,7 +73,7 @@ using device_matrix = device_mdarray<ElementType, matrix_extent<IndexType>, Layo
  * @tparam ElementType the data type of the matrix elements
  * @tparam IndexType the index type of the extents
  * @tparam LayoutPolicy policy for strides and layout ordering
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param exts dimensionality of the array (series of integers)
  * @return raft::device_mdarray
  */
@@ -96,7 +96,7 @@ auto make_device_mdarray(raft::resources const& handle, extents<IndexType, Exten
  * @tparam ElementType the data type of the matrix elements
  * @tparam IndexType the index type of the extents
  * @tparam LayoutPolicy policy for strides and layout ordering
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param mr rmm memory resource used for allocating the memory for the array
  * @param exts dimensionality of the array (series of integers)
  * @return raft::device_mdarray
diff --git a/cpp/include/raft/core/device_resources.hpp b/cpp/include/raft/core/device_resources.hpp
index 1cab36561a..c620a688b9 100644
--- a/cpp/include/raft/core/device_resources.hpp
+++ b/cpp/include/raft/core/device_resources.hpp
@@ -238,7 +238,7 @@ class stream_syncer {
  public:
   explicit stream_syncer(const device_resources& handle) : handle_(handle)
   {
-    handle_.sync_stream();
+    resource::sync_stream(handle_);
   }
   ~stream_syncer()
   {
diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index c7350a978c..5ae0886ce9 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -25,11 +25,11 @@
 #include <stddef.h>
 
 #include <raft/core/detail/macros.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_device_accessor.hpp>
 #include <raft/core/mdspan.hpp>
 #include <raft/core/mdspan_types.hpp>
 #include <raft/core/memory_type.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 /**
diff --git a/cpp/include/raft/core/resource/cuda_stream_pool.hpp b/cpp/include/raft/core/resource/cuda_stream_pool.hpp
index dbce75b3a5..7ed356485c 100644
--- a/cpp/include/raft/core/resource/cuda_stream_pool.hpp
+++ b/cpp/include/raft/core/resource/cuda_stream_pool.hpp
@@ -173,6 +173,10 @@ inline void sync_stream_pool(const resources& res, const std::vector<std::size_t
  */
 inline void wait_stream_pool_on_stream(const resources& res)
 {
+  if (!res.has_resource_factory(resource_type::CUDA_STREAM_POOL)) {
+    res.add_resource_factory(std::make_shared<cuda_stream_pool_resource_factory>());
+  }
+
   cudaEvent_t event = detail::get_cuda_stream_sync_event(res);
   RAFT_CUDA_TRY(cudaEventRecord(event, get_cuda_stream(res)));
   for (std::size_t i = 0; i < get_stream_pool_size(res); i++) {
diff --git a/cpp/include/raft/core/resource/thrust_policy.hpp b/cpp/include/raft/core/resource/thrust_policy.hpp
index 1e7441e5e4..78c04ce875 100644
--- a/cpp/include/raft/core/resource/thrust_policy.hpp
+++ b/cpp/include/raft/core/resource/thrust_policy.hpp
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/resource_types.hpp>
 #include <raft/core/resources.hpp>
 #include <rmm/exec_policy.hpp>
@@ -71,4 +72,4 @@ inline rmm::exec_policy& get_thrust_policy(resources const& res)
  * @}
  */
 
-}  // namespace raft::resource
\ No newline at end of file
+}  // namespace raft::resource
diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp
index 05814e2845..b2fef8c6ef 100644
--- a/cpp/include/raft/core/serialize.hpp
+++ b/cpp/include/raft/core/serialize.hpp
@@ -18,8 +18,9 @@
 
 #include <raft/core/detail/mdspan_numpy_serializer.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 #include <iostream>
 #include <vector>
@@ -32,7 +33,7 @@ namespace raft {
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void serialize_mdspan(
-  const raft::device_resources&,
+  const raft::resources&,
   std::ostream& os,
   const raft::host_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -41,7 +42,7 @@ inline void serialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void serialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::ostream& os,
   const raft::device_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -53,9 +54,9 @@ inline void serialize_mdspan(
   // Copy to host before serializing
   // For contiguous layouts, size() == product of dimensions
   std::vector<typename obj_t::value_type> tmp(obj.size());
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   raft::update_host(tmp.data(), obj.data_handle(), obj.size(), stream);
-  handle.sync_stream();
+  resource::sync_stream(handle);
   using inner_accessor_type = typename obj_t::accessor_type::accessor_type;
   auto tmp_mdspan =
     raft::host_mdspan<ElementType, Extents, LayoutPolicy, raft::host_accessor<inner_accessor_type>>(
@@ -65,7 +66,7 @@ inline void serialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void serialize_mdspan(
-  const raft::device_resources&,
+  const raft::resources&,
   std::ostream& os,
   const raft::managed_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -79,7 +80,7 @@ inline void serialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources&,
+  const raft::resources&,
   std::istream& is,
   raft::host_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -88,7 +89,7 @@ inline void deserialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::istream& is,
   raft::device_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -106,14 +107,14 @@ inline void deserialize_mdspan(
       tmp.data(), obj.extents());
   detail::numpy_serializer::deserialize_host_mdspan(is, tmp_mdspan);
 
-  cudaStream_t stream = handle.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(handle);
   raft::update_device(obj.data_handle(), tmp.data(), obj.size(), stream);
-  handle.sync_stream();
+  resource::sync_stream(handle);
 }
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::istream& is,
   raft::host_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>&& obj)
 {
@@ -122,7 +123,7 @@ inline void deserialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::istream& is,
   raft::managed_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>& obj)
 {
@@ -136,7 +137,7 @@ inline void deserialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::istream& is,
   raft::managed_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>&& obj)
 {
@@ -145,7 +146,7 @@ inline void deserialize_mdspan(
 
 template <typename ElementType, typename Extents, typename LayoutPolicy, typename AccessorPolicy>
 inline void deserialize_mdspan(
-  const raft::device_resources& handle,
+  const raft::resources& handle,
   std::istream& is,
   raft::device_mdspan<ElementType, Extents, LayoutPolicy, AccessorPolicy>&& obj)
 {
@@ -153,13 +154,13 @@ inline void deserialize_mdspan(
 }
 
 template <typename T>
-inline void serialize_scalar(const raft::device_resources&, std::ostream& os, const T& value)
+inline void serialize_scalar(const raft::resources&, std::ostream& os, const T& value)
 {
   detail::numpy_serializer::serialize_scalar(os, value);
 }
 
 template <typename T>
-inline T deserialize_scalar(const raft::device_resources&, std::istream& is)
+inline T deserialize_scalar(const raft::resources&, std::istream& is)
 {
   return detail::numpy_serializer::deserialize_scalar<T>(is);
 }
diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp
index a14944ed5b..a1432c9eb6 100644
--- a/cpp/include/raft/core/sparse_types.hpp
+++ b/cpp/include/raft/core/sparse_types.hpp
@@ -15,8 +15,8 @@
  */
 #pragma once
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/span.hpp>
 #include <raft/core/sparse_types.hpp>
 
diff --git a/cpp/include/raft/core/temporary_device_buffer.hpp b/cpp/include/raft/core/temporary_device_buffer.hpp
index 4baa7e9597..fcb63f169c 100644
--- a/cpp/include/raft/core/temporary_device_buffer.hpp
+++ b/cpp/include/raft/core/temporary_device_buffer.hpp
@@ -18,6 +18,7 @@
 
 #include "device_mdarray.hpp"
 #include "device_mdspan.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/util/cudart_utils.hpp>
 
@@ -64,17 +65,17 @@ class temporary_device_buffer {
   /**
    * @brief Construct a new temporary device buffer object
    *
-   * @param handle raft::device_resources
+   * @param handle raft::resources
    * @param data input pointer
    * @param extents dimensions of input array
    * @param write_back if true, any writes to the `view()` of this object will be copid
    *                   back if the original pointer was in host memory
    */
-  temporary_device_buffer(device_resources const& handle,
+  temporary_device_buffer(resources const& handle,
                           ElementType* data,
                           Extents extents,
                           bool write_back = false)
-    : stream_(handle.get_stream()),
+    : stream_(resource::get_cuda_stream(handle)),
       original_data_(data),
       extents_{extents},
       write_back_(write_back),
@@ -92,7 +93,7 @@ class temporary_device_buffer {
       typename owning_device_buffer::container_policy_type policy{};
 
       owning_device_buffer device_data{handle, layout, policy};
-      raft::copy(device_data.data_handle(), data, length_, handle.get_stream());
+      raft::copy(device_data.data_handle(), data, length_, resource::get_cuda_stream(handle));
       data_ = data_store{std::in_place_index<1>, std::move(device_data)};
     } else {
       data_ = data_store{std::in_place_index<0>, data};
@@ -140,9 +141,9 @@ class temporary_device_buffer {
  * @brief Factory to create a `raft::temporary_device_buffer`
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // Initialize raft::device_mdarray and raft::extents
  * // Can be either raft::device_mdarray or raft::host_mdarray
@@ -157,7 +158,7 @@ class temporary_device_buffer {
  * @tparam LayoutPolicy layout of the input
  * @tparam ContainerPolicy container to be used to own device memory if needed
  * @tparam Extents variadic dimensions for `raft::extents`
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param data input pointer
  * @param extents dimensions of input array
  * @param write_back if true, any writes to the `view()` of this object will be copid
@@ -169,7 +170,7 @@ template <typename ElementType,
           typename LayoutPolicy                        = layout_c_contiguous,
           template <typename> typename ContainerPolicy = device_uvector_policy,
           size_t... Extents>
-auto make_temporary_device_buffer(raft::device_resources const& handle,
+auto make_temporary_device_buffer(raft::resources const& handle,
                                   ElementType* data,
                                   raft::extents<IndexType, Extents...> extents,
                                   bool write_back = false)
@@ -184,9 +185,9 @@ auto make_temporary_device_buffer(raft::device_resources const& handle,
  *        `write_back=false`
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // Initialize raft::device_mdarray and raft::extents
  * // Can be either raft::device_mdarray or raft::host_mdarray
@@ -201,7 +202,7 @@ auto make_temporary_device_buffer(raft::device_resources const& handle,
  * @tparam LayoutPolicy layout of the input
  * @tparam ContainerPolicy container to be used to own device memory if needed
  * @tparam Extents variadic dimensions for `raft::extents`
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param data input pointer
  * @param extents dimensions of input array
  * @return raft::temporary_device_buffer
@@ -211,7 +212,7 @@ template <typename ElementType,
           typename LayoutPolicy                        = layout_c_contiguous,
           template <typename> typename ContainerPolicy = device_uvector_policy,
           size_t... Extents>
-auto make_readonly_temporary_device_buffer(raft::device_resources const& handle,
+auto make_readonly_temporary_device_buffer(raft::resources const& handle,
                                            ElementType* data,
                                            raft::extents<IndexType, Extents...> extents)
 {
@@ -227,9 +228,9 @@ auto make_readonly_temporary_device_buffer(raft::device_resources const& handle,
  *        `write_back=true`
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // Initialize raft::host_mdarray and raft::extents
  * // Can be either raft::device_mdarray or raft::host_mdarray
@@ -244,7 +245,7 @@ auto make_readonly_temporary_device_buffer(raft::device_resources const& handle,
  * @tparam LayoutPolicy layout of the input
  * @tparam ContainerPolicy container to be used to own device memory if needed
  * @tparam Extents variadic dimensions for `raft::extents`
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param data input pointer
  * @param extents dimensions of input array
  * @return raft::temporary_device_buffer
@@ -255,7 +256,7 @@ template <typename ElementType,
           template <typename> typename ContainerPolicy = device_uvector_policy,
           size_t... Extents,
           typename = std::enable_if_t<not std::is_const_v<ElementType>>>
-auto make_writeback_temporary_device_buffer(raft::device_resources const& handle,
+auto make_writeback_temporary_device_buffer(raft::resources const& handle,
                                             ElementType* data,
                                             raft::extents<IndexType, Extents...> extents)
 {
diff --git a/cpp/include/raft/distance/detail/compress_to_bits.cuh b/cpp/include/raft/distance/detail/compress_to_bits.cuh
index 9b994a873b..fa0df25461 100644
--- a/cpp/include/raft/distance/detail/compress_to_bits.cuh
+++ b/cpp/include/raft/distance/detail/compress_to_bits.cuh
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <raft/core/handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/device_atomics.cuh>
 
@@ -95,11 +96,11 @@ __global__ void compress_to_bits_kernel(
  *                          Note: the division (`/`) is a ceilDiv.
  */
 template <typename T = uint64_t, typename = std::enable_if_t<std::is_integral<T>::value>>
-void compress_to_bits(raft::device_resources const& handle,
+void compress_to_bits(raft::resources const& handle,
                       raft::device_matrix_view<const bool, int, raft::layout_c_contiguous> in,
                       raft::device_matrix_view<T, int, raft::layout_c_contiguous> out)
 {
-  auto stream                    = handle.get_stream();
+  auto stream                    = resource::get_cuda_stream(handle);
   constexpr int bits_per_element = 8 * sizeof(T);
 
   RAFT_EXPECTS(raft::ceildiv(in.extent(0), bits_per_element) == out.extent(0),
diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
index 2154aa560c..7cfc75cd96 100644
--- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
+++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
@@ -17,7 +17,8 @@
 #pragma once
 
 #include <raft/core/device_csr_matrix.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 // #include <raft/sparse/detail/cusparse_wrappers.h>
@@ -71,7 +72,7 @@ class GramMatrixBase {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void operator()(raft::device_resources const& handle,
+  void operator()(raft::resources const& handle,
                   dense_input_matrix_view_t<math_t> x1,
                   dense_input_matrix_view_t<math_t> x2,
                   dense_output_matrix_view_t<math_t> out,
@@ -91,7 +92,7 @@ class GramMatrixBase {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void operator()(raft::device_resources const& handle,
+  void operator()(raft::resources const& handle,
                   csr_input_matrix_view_t<math_t> x1,
                   dense_input_matrix_view_t<math_t> x2,
                   dense_output_matrix_view_t<math_t> out,
@@ -111,7 +112,7 @@ class GramMatrixBase {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void operator()(raft::device_resources const& handle,
+  void operator()(raft::resources const& handle,
                   csr_input_matrix_view_t<math_t> x1,
                   csr_input_matrix_view_t<math_t> x2,
                   dense_output_matrix_view_t<math_t> out,
@@ -132,7 +133,7 @@ class GramMatrixBase {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  virtual void evaluate(raft::device_resources const& handle,
+  virtual void evaluate(raft::resources const& handle,
                         dense_input_matrix_view_t<math_t> x1,
                         dense_input_matrix_view_t<math_t> x2,
                         dense_output_matrix_view_t<math_t> out,
@@ -150,7 +151,7 @@ class GramMatrixBase {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  virtual void evaluate(raft::device_resources const& handle,
+  virtual void evaluate(raft::resources const& handle,
                         csr_input_matrix_view_t<math_t> x1,
                         dense_input_matrix_view_t<math_t> x2,
                         dense_output_matrix_view_t<math_t> out,
@@ -168,7 +169,7 @@ class GramMatrixBase {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  virtual void evaluate(raft::device_resources const& handle,
+  virtual void evaluate(raft::resources const& handle,
                         csr_input_matrix_view_t<math_t> x1,
                         csr_input_matrix_view_t<math_t> x2,
                         dense_output_matrix_view_t<math_t> out,
@@ -345,7 +346,7 @@ class GramMatrixBase {
    * @param [in] x2 dense device matrix view, size [n2*n_cols]
    * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
    */
-  void linear(raft::device_resources const& handle,
+  void linear(raft::resources const& handle,
               dense_input_matrix_view_t<math_t> x1,
               dense_input_matrix_view_t<math_t> x2,
               dense_output_matrix_view_t<math_t> out)
@@ -388,7 +389,7 @@ class GramMatrixBase {
                          &beta,
                          out.data_handle(),
                          ld_out,
-                         handle.get_stream());
+                         resource::get_cuda_stream(handle));
     } else {
       // #TODO: Use mdspan-based API when stride-capable
       // https://github.com/rapidsai/raft/issues/875
@@ -406,7 +407,7 @@ class GramMatrixBase {
                          &beta,
                          out.data_handle(),
                          ld_out,
-                         handle.get_stream());
+                         resource::get_cuda_stream(handle));
     }
   }
 
@@ -421,7 +422,7 @@ class GramMatrixBase {
    * @param [in] x2 dense device matrix view, size [n2*n_cols]
    * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
    */
-  void linear(raft::device_resources const& handle,
+  void linear(raft::resources const& handle,
               csr_input_matrix_view_t<math_t> x1,
               dense_input_matrix_view_t<math_t> x2,
               dense_output_matrix_view_t<math_t> out)
@@ -458,7 +459,7 @@ class GramMatrixBase {
    * @param [in] x2 csr device matrix view, size [n2*n_cols]
    * @param [out] out dense device matrix view for the Gram matrix, size [n1*n2]
    */
-  void linear(raft::device_resources const& handle,
+  void linear(raft::resources const& handle,
               csr_input_matrix_view_t<math_t> x1,
               csr_input_matrix_view_t<math_t> x2,
               dense_output_matrix_view_t<math_t> out)
diff --git a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
index 7ff886c677..234265dbc1 100644
--- a/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
+++ b/cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "gram_matrix.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/distance/detail/kernels/rbf_fin_op.cuh>
 #include <raft/distance/distance.cuh>
@@ -196,7 +197,7 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 dense_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -206,8 +207,12 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using polynomial kernel.
@@ -223,7 +228,7 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -233,8 +238,12 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using polynomial kernel.
@@ -250,7 +259,7 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 csr_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -260,8 +269,12 @@ class PolynomialKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate the Gram matrix using the legacy interface.
@@ -354,7 +367,7 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 dense_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -364,8 +377,12 @@ class TanhKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using tanh kernel.
@@ -381,7 +398,7 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -391,8 +408,12 @@ class TanhKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using tanh kernel.
@@ -408,7 +429,7 @@ class TanhKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 unused.
    * @param norm_x2 unused.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 csr_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
@@ -418,8 +439,12 @@ class TanhKernel : public GramMatrixBase<math_t> {
     bool is_row_major = GramMatrixBase<math_t>::get_is_row_major(out);
     int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
     GramMatrixBase<math_t>::linear(handle, x1, x2, out);
-    applyKernel(
-      out.data_handle(), ld_out, out.extent(0), out.extent(1), is_row_major, handle.get_stream());
+    applyKernel(out.data_handle(),
+                ld_out,
+                out.extent(0),
+                out.extent(1),
+                is_row_major,
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate the Gram matrix using the legacy interface.
@@ -499,7 +524,7 @@ class RBFKernel : public GramMatrixBase<math_t> {
   {
   }
 
-  void matrixRowNormL2(raft::device_resources const& handle,
+  void matrixRowNormL2(raft::resources const& handle,
                        dense_input_matrix_view_t<math_t> matrix,
                        math_t* target)
   {
@@ -513,10 +538,10 @@ class RBFKernel : public GramMatrixBase<math_t> {
                           matrix.extent(0),
                           raft::linalg::NormType::L2Norm,
                           is_row_major,
-                          handle.get_stream());
+                          resource::get_cuda_stream(handle));
   }
 
-  void matrixRowNormL2(raft::device_resources const& handle,
+  void matrixRowNormL2(raft::resources const& handle,
                        csr_input_matrix_view_t<math_t> matrix,
                        math_t* target)
   {
@@ -543,14 +568,14 @@ class RBFKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 dense_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
                 math_t* norm_x1,
                 math_t* norm_x2)
   {
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     // lazy compute norms if not given
     rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
@@ -577,7 +602,7 @@ class RBFKernel : public GramMatrixBase<math_t> {
                 norm_x1,
                 norm_x2,
                 is_row_major,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using RBF kernel.
@@ -593,14 +618,14 @@ class RBFKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 dense_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
                 math_t* norm_x1,
                 math_t* norm_x2)
   {
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     // lazy compute norms if not given
     rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
@@ -627,7 +652,7 @@ class RBFKernel : public GramMatrixBase<math_t> {
                 norm_x1,
                 norm_x2,
                 is_row_major,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate kernel matrix using RBF kernel.
@@ -643,14 +668,14 @@ class RBFKernel : public GramMatrixBase<math_t> {
    * @param norm_x1 optional L2-norm of x1's rows for computation within RBF.
    * @param norm_x2 optional L2-norm of x2's rows for computation within RBF.
    */
-  void evaluate(raft::device_resources const& handle,
+  void evaluate(raft::resources const& handle,
                 csr_input_matrix_view_t<math_t> x1,
                 csr_input_matrix_view_t<math_t> x2,
                 dense_output_matrix_view_t<math_t> out,
                 math_t* norm_x1,
                 math_t* norm_x2)
   {
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     // lazy compute norms if not given
     rmm::device_uvector<math_t> tmp_norm_x1(0, stream);
@@ -677,7 +702,7 @@ class RBFKernel : public GramMatrixBase<math_t> {
                 norm_x1,
                 norm_x2,
                 is_row_major,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
   }
 
   /** Evaluate the Gram matrix using the legacy interface.
@@ -720,12 +745,16 @@ class RBFKernel : public GramMatrixBase<math_t> {
     using index_t = int64_t;
 
     rbf_fin_op fin_op{gain};
+
+    raft::resources handle;
+    resource::set_cuda_stream(handle, stream);
+
     raft::distance::distance<raft::distance::DistanceType::L2Unexpanded,
                              math_t,
                              math_t,
                              math_t,
                              decltype(fin_op),
-                             index_t>(device_resources(stream),
+                             index_t>(handle,
                                       const_cast<math_t*>(x1),
                                       const_cast<math_t*>(x2),
                                       out,
diff --git a/cpp/include/raft/distance/detail/masked_nn.cuh b/cpp/include/raft/distance/detail/masked_nn.cuh
index 1cf7188b06..0e13783c19 100644
--- a/cpp/include/raft/distance/detail/masked_nn.cuh
+++ b/cpp/include/raft/distance/detail/masked_nn.cuh
@@ -17,6 +17,8 @@
 #pragma once
 
 #include <limits>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <stdint.h>
 
 #include <raft/distance/detail/compress_to_bits.cuh>
@@ -230,7 +232,7 @@ __global__ __launch_bounds__(P::Nthreads, 2) void masked_l2_nn_kernel(OutT* min,
  *
  */
 template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT, typename KVPReduceOpT>
-void masked_l2_nn_impl(raft::device_resources const& handle,
+void masked_l2_nn_impl(raft::resources const& handle,
                        OutT* out,
                        const DataT* x,
                        const DataT* y,
@@ -253,8 +255,8 @@ void masked_l2_nn_impl(raft::device_resources const& handle,
 
   // Get stream and workspace memory resource
   rmm::mr::device_memory_resource* ws_mr =
-    dynamic_cast<rmm::mr::device_memory_resource*>(handle.get_workspace_resource());
-  auto stream = handle.get_stream();
+    dynamic_cast<rmm::mr::device_memory_resource*>(resource::get_workspace_resource(handle));
+  auto stream = resource::get_cuda_stream(handle);
 
   // Acquire temporary buffers and initialize to zero:
   // 1) Adjacency matrix bitfield
diff --git a/cpp/include/raft/distance/distance-inl.cuh b/cpp/include/raft/distance/distance-inl.cuh
index 3399443765..d17e5767b9 100644
--- a/cpp/include/raft/distance/distance-inl.cuh
+++ b/cpp/include/raft/distance/distance-inl.cuh
@@ -357,12 +357,12 @@ void pairwise_distance(raft::resources const& handle,
  *
  * Usage example:
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  * #include <raft/core/device_mdarray.hpp>
  * #include <raft/random/make_blobs.cuh>
  * #include <raft/distance/distance.cuh>
  *
- * raft::raft::device_resources handle;
+ * raft::raft::resources handle;
  * int n_samples = 5000;
  * int n_features = 50;
  *
diff --git a/cpp/include/raft/distance/fused_l2_nn-ext.cuh b/cpp/include/raft/distance/fused_l2_nn-ext.cuh
index 05732c1f3f..c99c1eb015 100644
--- a/cpp/include/raft/distance/fused_l2_nn-ext.cuh
+++ b/cpp/include/raft/distance/fused_l2_nn-ext.cuh
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <cstdint>                                // int64_t
-#include <raft/core/device_resources.hpp>         // raft::device_resources
 #include <raft/core/kvp.hpp>                      // raft::KeyValuePair
+#include <raft/core/resources.hpp>                // raft::resources
 #include <raft/distance/fused_l2_nn_helpers.cuh>  // include initialize and reduce operations
 #include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
 
diff --git a/cpp/include/raft/distance/fused_l2_nn-inl.cuh b/cpp/include/raft/distance/fused_l2_nn-inl.cuh
index 698d287f87..17373e3bcc 100644
--- a/cpp/include/raft/distance/fused_l2_nn-inl.cuh
+++ b/cpp/include/raft/distance/fused_l2_nn-inl.cuh
@@ -21,7 +21,7 @@
 
 #include <cub/cub.cuh>
 #include <limits>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/detail/fused_l2_nn.cuh>
 #include <raft/distance/fused_l2_nn_helpers.cuh>
 #include <raft/linalg/contractions.cuh>
diff --git a/cpp/include/raft/distance/fused_l2_nn_helpers.cuh b/cpp/include/raft/distance/fused_l2_nn_helpers.cuh
index 1bcd7d8dba..996f696ef6 100644
--- a/cpp/include/raft/distance/fused_l2_nn_helpers.cuh
+++ b/cpp/include/raft/distance/fused_l2_nn_helpers.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/detail/fused_l2_nn.cuh>
 
 namespace raft::distance {
@@ -40,10 +41,10 @@ using MinReduceOp = detail::MinReduceOpImpl<LabelT, DataT>;
  * Initialize array using init value from reduction op
  */
 template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT>
-void initialize(
-  raft::device_resources const& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
+void initialize(raft::resources const& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
 {
-  detail::initialize<DataT, OutT, IdxT, ReduceOpT>(min, m, maxVal, redOp, handle.get_stream());
+  detail::initialize<DataT, OutT, IdxT, ReduceOpT>(
+    min, m, maxVal, redOp, resource::get_cuda_stream(handle));
 }
 
 }  // namespace raft::distance
diff --git a/cpp/include/raft/distance/masked_nn.cuh b/cpp/include/raft/distance/masked_nn.cuh
index 772e9de134..33a6c0456d 100644
--- a/cpp/include/raft/distance/masked_nn.cuh
+++ b/cpp/include/raft/distance/masked_nn.cuh
@@ -145,7 +145,7 @@ struct masked_l2_nn_params {
  *                           (on device)
  */
 template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT, typename KVPReduceOpT>
-void masked_l2_nn(raft::device_resources const& handle,
+void masked_l2_nn(raft::resources const& handle,
                   raft::distance::masked_l2_nn_params<ReduceOpT, KVPReduceOpT> params,
                   raft::device_matrix_view<const DataT, IdxT, raft::layout_c_contiguous> x,
                   raft::device_matrix_view<const DataT, IdxT, raft::layout_c_contiguous> y,
diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh
index c19f491319..30f4a2d167 100644
--- a/cpp/include/raft/linalg/add.cuh
+++ b/cpp/include/raft/linalg/add.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/add.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -95,7 +96,7 @@ void addDevScalar(
  * @brief Elementwise add operation
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in1    First Input
  * @param[in] in2    Second Input
  * @param[out] out    Output
@@ -104,7 +105,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void add(raft::device_resources const& handle, InType in1, InType in2, OutType out)
+void add(raft::resources const& handle, InType in1, InType in2, OutType out)
 {
   using in_value_t  = typename InType::value_type;
   using out_value_t = typename OutType::value_type;
@@ -120,13 +121,13 @@ void add(raft::device_resources const& handle, InType in1, InType in2, OutType o
                                                 in1.data_handle(),
                                                 in2.data_handle(),
                                                 static_cast<std::uint32_t>(out.size()),
-                                                handle.get_stream());
+                                                resource::get_cuda_stream(handle));
   } else {
     add<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                 in1.data_handle(),
                                                 in2.data_handle(),
                                                 static_cast<std::uint64_t>(out.size()),
-                                                handle.get_stream());
+                                                resource::get_cuda_stream(handle));
   }
 }
 
@@ -135,7 +136,7 @@ void add(raft::device_resources const& handle, InType in1, InType in2, OutType o
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[in] scalar    raft::device_scalar_view
  * @param[in] out    Output
@@ -145,7 +146,7 @@ template <typename InType,
           typename ScalarIdxType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void add_scalar(raft::device_resources const& handle,
+void add_scalar(raft::resources const& handle,
                 InType in,
                 OutType out,
                 raft::device_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
@@ -162,13 +163,13 @@ void add_scalar(raft::device_resources const& handle,
                                                          in.data_handle(),
                                                          scalar.data_handle(),
                                                          static_cast<std::uint32_t>(out.size()),
-                                                         handle.get_stream());
+                                                         resource::get_cuda_stream(handle));
   } else {
     addDevScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                          in.data_handle(),
                                                          scalar.data_handle(),
                                                          static_cast<std::uint64_t>(out.size()),
-                                                         handle.get_stream());
+                                                         resource::get_cuda_stream(handle));
   }
 }
 
@@ -177,7 +178,7 @@ void add_scalar(raft::device_resources const& handle,
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[in] scalar    raft::host_scalar_view
  * @param[in] out    Output
@@ -187,7 +188,7 @@ template <typename InType,
           typename ScalarIdxType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void add_scalar(raft::device_resources const& handle,
+void add_scalar(raft::resources const& handle,
                 const InType in,
                 OutType out,
                 raft::host_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
@@ -204,13 +205,13 @@ void add_scalar(raft::device_resources const& handle,
                                                       in.data_handle(),
                                                       *scalar.data_handle(),
                                                       static_cast<std::uint32_t>(out.size()),
-                                                      handle.get_stream());
+                                                      resource::get_cuda_stream(handle));
   } else {
     addScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                       in.data_handle(),
                                                       *scalar.data_handle(),
                                                       static_cast<std::uint64_t>(out.size()),
-                                                      handle.get_stream());
+                                                      resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/axpy.cuh b/cpp/include/raft/linalg/axpy.cuh
index 9b3af73234..2c901b45da 100644
--- a/cpp/include/raft/linalg/axpy.cuh
+++ b/cpp/include/raft/linalg/axpy.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/axpy.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -41,7 +42,7 @@ namespace raft::linalg {
  * @param [in] stream
  */
 template <typename T, bool DevicePointerMode = false>
-void axpy(raft::device_resources const& handle,
+void axpy(raft::resources const& handle,
           const int n,
           const T* alpha,
           const T* x,
@@ -62,7 +63,7 @@ void axpy(raft::device_resources const& handle,
  * @brief axpy function
  *  It computes the following equation: y = alpha * x + y
  *
- * @param [in] handle raft::device_resources
+ * @param [in] handle raft::resources
  * @param [in] alpha raft::device_scalar_view
  * @param [in] x Input vector
  * @param [inout] y Output vector
@@ -72,7 +73,7 @@ template <typename ElementType,
           typename InLayoutPolicy,
           typename OutLayoutPolicy,
           typename ScalarIdxType>
-void axpy(raft::device_resources const& handle,
+void axpy(raft::resources const& handle,
           raft::device_scalar_view<const ElementType, ScalarIdxType> alpha,
           raft::device_vector_view<const ElementType, IndexType, InLayoutPolicy> x,
           raft::device_vector_view<ElementType, IndexType, OutLayoutPolicy> y)
@@ -86,13 +87,13 @@ void axpy(raft::device_resources const& handle,
                           x.stride(0),
                           y.data_handle(),
                           y.stride(0),
-                          handle.get_stream());
+                          resource::get_cuda_stream(handle));
 }
 
 /**
  * @brief axpy function
  *  It computes the following equation: y = alpha * x + y
- * @param [in] handle raft::device_resources
+ * @param [in] handle raft::resources
  * @param [in] alpha raft::device_scalar_view
  * @param [in] x Input vector
  * @param [inout] y Output vector
@@ -102,7 +103,7 @@ template <typename ElementType,
           typename InLayoutPolicy,
           typename OutLayoutPolicy,
           typename ScalarIdxType>
-void axpy(raft::device_resources const& handle,
+void axpy(raft::resources const& handle,
           raft::host_scalar_view<const ElementType, ScalarIdxType> alpha,
           raft::device_vector_view<const ElementType, IndexType, InLayoutPolicy> x,
           raft::device_vector_view<ElementType, IndexType, OutLayoutPolicy> y)
@@ -116,7 +117,7 @@ void axpy(raft::device_resources const& handle,
                            x.stride(0),
                            y.data_handle(),
                            y.stride(0),
-                           handle.get_stream());
+                           resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group axpy
diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh
index 88c49d1f42..f6889e959b 100644
--- a/cpp/include/raft/linalg/binary_op.cuh
+++ b/cpp/include/raft/linalg/binary_op.cuh
@@ -19,7 +19,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/map.cuh>
 
 namespace raft {
@@ -62,7 +62,7 @@ void binaryOp(
  * @tparam InType Input Type raft::device_mdspan
  * @tparam Lambda the device-lambda performing the actual operation
  * @tparam OutType Output Type raft::device_mdspan
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in1 First input
  * @param[in] in2 Second input
  * @param[out] out Output
@@ -75,7 +75,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void binary_op(raft::device_resources const& handle, InType in1, InType in2, OutType out, Lambda op)
+void binary_op(raft::resources const& handle, InType in1, InType in2, OutType out, Lambda op)
 {
   return map(handle, in1, in2, out, op);
 }
diff --git a/cpp/include/raft/linalg/cholesky_r1_update.cuh b/cpp/include/raft/linalg/cholesky_r1_update.cuh
index e10f43653b..5c345028f2 100644
--- a/cpp/include/raft/linalg/cholesky_r1_update.cuh
+++ b/cpp/include/raft/linalg/cholesky_r1_update.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/cholesky_r1_update.cuh"
+#include <raft/core/resource/cublas_handle.hpp>
 
 namespace raft {
 namespace linalg {
@@ -72,7 +73,7 @@ namespace linalg {
  *   // Calculate a new row/column of matrix A into A_new
  *   // ...
  *   // Copy new row to L[rank-1,:]
- *   RAFT_CUBLAS_TRY(cublasCopy(handle.get_cublas_handle(), n - 1, A_new, 1,
+ *   RAFT_CUBLAS_TRY(cublasCopy(resource::get_cublas_handle(handle), n - 1, A_new, 1,
  *                           L + n - 1, ld_L, stream));
  *   // Update Cholesky factorization
  *   raft::linalg::choleskyRank1Update(
@@ -121,7 +122,7 @@ namespace linalg {
  *    conditioned systems. Negative values mean no regularizaton.
  */
 template <typename math_t>
-void choleskyRank1Update(raft::device_resources const& handle,
+void choleskyRank1Update(raft::resources const& handle,
                          math_t* L,
                          int n,
                          int ld,
diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh
index 48c121c359..5609656234 100644
--- a/cpp/include/raft/linalg/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/coalesced_reduction.cuh
@@ -19,10 +19,11 @@
 #pragma once
 
 #include "detail/coalesced_reduction.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
@@ -101,7 +102,7 @@ void coalescedReduction(OutType* dots,
  * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm)
  * It must be a 'callable' supporting the following input and output:
  * <pre>OutType (*FinalLambda)(OutType);</pre>
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param[in] data Input of type raft::device_matrix_view
  * @param[out] dots Output of type raft::device_matrix_view
  * @param[in] init initial value to use for the reduction
@@ -117,7 +118,7 @@ template <typename InValueType,
           typename MainLambda   = raft::identity_op,
           typename ReduceLambda = raft::add_op,
           typename FinalLambda  = raft::identity_op>
-void coalesced_reduction(raft::device_resources const& handle,
+void coalesced_reduction(raft::resources const& handle,
                          raft::device_matrix_view<const InValueType, IdxType, LayoutPolicy> data,
                          raft::device_vector_view<OutValueType, IdxType> dots,
                          OutValueType init,
@@ -135,7 +136,7 @@ void coalesced_reduction(raft::device_resources const& handle,
                        data.extent(1),
                        data.extent(0),
                        init,
-                       handle.get_stream(),
+                       resource::get_cuda_stream(handle),
                        inplace,
                        main_op,
                        reduce_op,
@@ -149,7 +150,7 @@ void coalesced_reduction(raft::device_resources const& handle,
                        data.extent(0),
                        data.extent(1),
                        init,
-                       handle.get_stream(),
+                       resource::get_cuda_stream(handle),
                        inplace,
                        main_op,
                        reduce_op,
diff --git a/cpp/include/raft/linalg/detail/axpy.cuh b/cpp/include/raft/linalg/detail/axpy.cuh
index 5747e840c4..8dfeab1118 100644
--- a/cpp/include/raft/linalg/detail/axpy.cuh
+++ b/cpp/include/raft/linalg/detail/axpy.cuh
@@ -17,15 +17,16 @@
 #pragma once
 
 #include <cublas_v2.h>
+#include <raft/core/resource/cublas_handle.hpp>
 
 #include "cublas_wrappers.hpp"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::linalg::detail {
 
 template <typename T, bool DevicePointerMode = false>
-void axpy(raft::device_resources const& handle,
+void axpy(raft::resources const& handle,
           const int n,
           const T* alpha,
           const T* x,
@@ -34,7 +35,7 @@ void axpy(raft::device_resources const& handle,
           const int incy,
           cudaStream_t stream)
 {
-  auto cublas_h = handle.get_cublas_handle();
+  auto cublas_h = resource::get_cublas_handle(handle);
   cublas_device_pointer_mode<DevicePointerMode> pmode(cublas_h);
   RAFT_CUBLAS_TRY(cublasaxpy(cublas_h, n, alpha, x, incx, y, incy, stream));
 }
diff --git a/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh b/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh
index afa9155753..34d6bf01ee 100644
--- a/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh
+++ b/cpp/include/raft/linalg/detail/cholesky_r1_update.cuh
@@ -18,7 +18,9 @@
 
 #include "cublas_wrappers.hpp"
 #include "cusolver_wrappers.hpp"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/binary_op.cuh>
 
 namespace raft {
@@ -26,7 +28,7 @@ namespace linalg {
 namespace detail {
 
 template <typename math_t>
-void choleskyRank1Update(raft::device_resources const& handle,
+void choleskyRank1Update(raft::resources const& handle,
                          math_t* L,
                          int n,
                          int ld,
@@ -75,13 +77,14 @@ void choleskyRank1Update(raft::device_resources const& handle,
     // contiguous. We copy elements from A_row to a contiguous workspace A_new.
     A_row = L + n - 1;
     A_new = reinterpret_cast<math_t*>(workspace);
-    RAFT_CUBLAS_TRY(cublasCopy(handle.get_cublas_handle(), n - 1, A_row, ld, A_new, 1, stream));
+    RAFT_CUBLAS_TRY(
+      cublasCopy(resource::get_cublas_handle(handle), n - 1, A_row, ld, A_new, 1, stream));
   }
   cublasOperation_t op = (uplo == CUBLAS_FILL_MODE_UPPER) ? CUBLAS_OP_T : CUBLAS_OP_N;
   if (n > 1) {
     // Calculate L_12 = x by solving equation L_11 x = A_12
     math_t alpha = 1;
-    RAFT_CUBLAS_TRY(cublastrsm(handle.get_cublas_handle(),
+    RAFT_CUBLAS_TRY(cublastrsm(resource::get_cublas_handle(handle),
                                CUBLAS_SIDE_LEFT,
                                uplo,
                                op,
@@ -96,11 +99,13 @@ void choleskyRank1Update(raft::device_resources const& handle,
                                stream));
 
     // A_new now stores L_12, we calculate s = L_12 * L_12
-    RAFT_CUBLAS_TRY(cublasdot(handle.get_cublas_handle(), n - 1, A_new, 1, A_new, 1, s, stream));
+    RAFT_CUBLAS_TRY(
+      cublasdot(resource::get_cublas_handle(handle), n - 1, A_new, 1, A_new, 1, s, stream));
 
     if (uplo == CUBLAS_FILL_MODE_LOWER) {
       // Copy back the L_12 elements as the n-th row of L
-      RAFT_CUBLAS_TRY(cublasCopy(handle.get_cublas_handle(), n - 1, A_new, 1, A_row, ld, stream));
+      RAFT_CUBLAS_TRY(
+        cublasCopy(resource::get_cublas_handle(handle), n - 1, A_new, 1, A_row, ld, stream));
     }
   } else {  // n == 1 case
     RAFT_CUDA_TRY(cudaMemsetAsync(s, 0, sizeof(math_t), stream));
@@ -111,7 +116,7 @@ void choleskyRank1Update(raft::device_resources const& handle,
   math_t L_22_host;
   raft::update_host(&s_host, s, 1, stream);
   raft::update_host(&L_22_host, L_22, 1, stream);  // L_22 stores A_22
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   L_22_host = std::sqrt(L_22_host - s_host);
 
   // Check for numeric error with sqrt. If the matrix is not positive definite or
diff --git a/cpp/include/raft/linalg/detail/eig.cuh b/cpp/include/raft/linalg/detail/eig.cuh
index 7896136631..c9f6c3c040 100644
--- a/cpp/include/raft/linalg/detail/eig.cuh
+++ b/cpp/include/raft/linalg/detail/eig.cuh
@@ -18,7 +18,8 @@
 
 #include "cusolver_wrappers.hpp"
 #include <cuda_runtime_api.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/copy.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
@@ -29,7 +30,7 @@ namespace linalg {
 namespace detail {
 
 template <typename math_t>
-void eigDC_legacy(raft::device_resources const& handle,
+void eigDC_legacy(raft::resources const& handle,
                   const math_t* in,
                   std::size_t n_rows,
                   std::size_t n_cols,
@@ -37,7 +38,7 @@ void eigDC_legacy(raft::device_resources const& handle,
                   math_t* eig_vals,
                   cudaStream_t stream)
 {
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   int lwork;
   RAFT_CUSOLVER_TRY(cusolverDnsyevd_bufferSize(cusolverH,
@@ -76,7 +77,7 @@ void eigDC_legacy(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void eigDC(raft::device_resources const& handle,
+void eigDC(raft::resources const& handle,
            const math_t* in,
            std::size_t n_rows,
            std::size_t n_cols,
@@ -87,7 +88,7 @@ void eigDC(raft::device_resources const& handle,
 #if CUDART_VERSION < 11010
   eigDC_legacy(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream);
 #else
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   cusolverDnParams_t dn_params = nullptr;
   RAFT_CUSOLVER_TRY(cusolverDnCreateParams(&dn_params));
@@ -141,7 +142,7 @@ void eigDC(raft::device_resources const& handle,
 enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT };
 
 template <typename math_t>
-void eigSelDC(raft::device_resources const& handle,
+void eigSelDC(raft::resources const& handle,
               math_t* in,
               std::size_t n_rows,
               std::size_t n_cols,
@@ -151,7 +152,7 @@ void eigSelDC(raft::device_resources const& handle,
               EigVecMemUsage memUsage,
               cudaStream_t stream)
 {
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   int lwork;
   int h_meig;
@@ -240,7 +241,7 @@ void eigSelDC(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void eigJacobi(raft::device_resources const& handle,
+void eigJacobi(raft::resources const& handle,
                const math_t* in,
                std::size_t n_rows,
                std::size_t n_cols,
@@ -250,7 +251,7 @@ void eigJacobi(raft::device_resources const& handle,
                math_t tol = 1.e-7,
                int sweeps = 15)
 {
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   syevjInfo_t syevj_params = nullptr;
   RAFT_CUSOLVER_TRY(cusolverDnCreateSyevjInfo(&syevj_params));
diff --git a/cpp/include/raft/linalg/detail/gemv.hpp b/cpp/include/raft/linalg/detail/gemv.hpp
index b3e001a851..c75bb87515 100644
--- a/cpp/include/raft/linalg/detail/gemv.hpp
+++ b/cpp/include/raft/linalg/detail/gemv.hpp
@@ -17,17 +17,18 @@
 #pragma once
 
 #include <cublas_v2.h>
+#include <raft/core/resource/cublas_handle.hpp>
 
 #include "cublas_wrappers.hpp"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
 namespace detail {
 
 template <typename math_t, bool DevicePointerMode = false>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const bool trans_a,
           const int m,
           const int n,
@@ -41,7 +42,7 @@ void gemv(raft::device_resources const& handle,
           const int incy,
           cudaStream_t stream)
 {
-  cublasHandle_t cublas_h = handle.get_cublas_handle();
+  cublasHandle_t cublas_h = resource::get_cublas_handle(handle);
   detail::cublas_device_pointer_mode<DevicePointerMode> pmode(cublas_h);
   RAFT_CUBLAS_TRY(detail::cublasgemv(cublas_h,
                                      trans_a ? CUBLAS_OP_T : CUBLAS_OP_N,
@@ -59,7 +60,7 @@ void gemv(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows,
           const int n_cols,
@@ -76,7 +77,7 @@ void gemv(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -91,7 +92,7 @@ void gemv(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -107,7 +108,7 @@ void gemv(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -119,14 +120,14 @@ void gemv(raft::device_resources const& handle,
           const math_t beta,
           cudaStream_t stream)
 {
-  cublasHandle_t cublas_h = handle.get_cublas_handle();
+  cublasHandle_t cublas_h = resource::get_cublas_handle(handle);
   cublasOperation_t op_a  = trans_a ? CUBLAS_OP_T : CUBLAS_OP_N;
   RAFT_CUBLAS_TRY(
     cublasgemv(cublas_h, op_a, n_rows_a, n_cols_a, &alpha, A, lda, x, 1, &beta, y, 1, stream));
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
diff --git a/cpp/include/raft/linalg/detail/lanczos.cuh b/cpp/include/raft/linalg/detail/lanczos.cuh
index 73d93ab535..3ab020bfd4 100644
--- a/cpp/include/raft/linalg/detail/lanczos.cuh
+++ b/cpp/include/raft/linalg/detail/lanczos.cuh
@@ -20,13 +20,15 @@
 #define _USE_MATH_DEFINES
 
 #include <cmath>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <cuda.h>
 #include <curand.h>
 
 #include "cublas_wrappers.hpp"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/spectral/detail/lapack.hpp>
 #include <raft/spectral/detail/warn_dbg.hpp>
 #include <raft/spectral/matrix_wrappers.hpp>
@@ -82,7 +84,7 @@ inline curandStatus_t curandGenerateNormalX(
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-int performLanczosIteration(raft::device_resources const& handle,
+int performLanczosIteration(raft::resources const& handle,
                             spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
                             index_type_t* iter,
                             index_type_t maxIter,
@@ -104,8 +106,8 @@ int performLanczosIteration(raft::device_resources const& handle,
   constexpr value_type_t zero   = 0;
   value_type_t alpha;
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   RAFT_EXPECTS(A != nullptr, "Null matrix pointer.");
 
@@ -269,7 +271,7 @@ int performLanczosIteration(raft::device_resources const& handle,
     RAFT_CUBLAS_TRY(cublasscal(cublas_h, n, &alpha, lanczosVecs_dev + IDX(0, *iter, n), 1, stream));
   }
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   return 0;
 }
@@ -540,7 +542,7 @@ static int francisQRIteration(index_type_t n,
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-static int lanczosRestart(raft::device_resources const& handle,
+static int lanczosRestart(raft::resources const& handle,
                           index_type_t n,
                           index_type_t iter,
                           index_type_t iter_new,
@@ -562,8 +564,8 @@ static int lanczosRestart(raft::device_resources const& handle,
   constexpr value_type_t zero = 0;
   constexpr value_type_t one  = 1;
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // Loop index
   index_type_t i;
@@ -743,7 +745,7 @@ static int lanczosRestart(raft::device_resources const& handle,
  */
 template <typename index_type_t, typename value_type_t>
 int computeSmallestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -794,8 +796,8 @@ int computeSmallestEigenvectors(
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
   RAFT_EXPECTS(restartIter >= nEigVecs, "Invalid restartIter.");
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // -------------------------------------------------------
   // Variable initialization
@@ -984,7 +986,7 @@ int computeSmallestEigenvectors(
 
 template <typename index_type_t, typename value_type_t>
 int computeSmallestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -1087,7 +1089,7 @@ int computeSmallestEigenvectors(
  */
 template <typename index_type_t, typename value_type_t>
 int computeLargestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -1138,8 +1140,8 @@ int computeLargestEigenvectors(
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
   RAFT_EXPECTS(restartIter >= nEigVecs, "Invalid restartIter.");
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // -------------------------------------------------------
   // Variable initialization
@@ -1331,7 +1333,7 @@ int computeLargestEigenvectors(
 
 template <typename index_type_t, typename value_type_t>
 int computeLargestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
diff --git a/cpp/include/raft/linalg/detail/lstsq.cuh b/cpp/include/raft/linalg/detail/lstsq.cuh
index fd6b00f9fd..128757d1d8 100644
--- a/cpp/include/raft/linalg/detail/lstsq.cuh
+++ b/cpp/include/raft/linalg/detail/lstsq.cuh
@@ -18,6 +18,9 @@
 
 #include <common/nvtx.hpp>
 #include <raft/common/nvtx.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/detail/cusolver_wrappers.hpp>
 #include <raft/linalg/eig.cuh>
@@ -116,7 +119,7 @@ struct DivideByNonZero {
  *             so it's not guaranteed to stay unmodified.
  */
 template <typename math_t>
-void lstsqSvdQR(raft::device_resources const& handle,
+void lstsqSvdQR(raft::resources const& handle,
                 math_t* A,
                 const int n_rows,
                 const int n_cols,
@@ -125,7 +128,7 @@ void lstsqSvdQR(raft::device_resources const& handle,
                 cudaStream_t stream)
 {
   const int minmn              = min(n_rows, n_cols);
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
   int cusolverWorkSetSize      = 0;
   // #TODO: Call from public API when ready
   RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDngesvd_bufferSize<math_t>(
@@ -176,7 +179,7 @@ void lstsqSvdQR(raft::device_resources const& handle,
  *             so it's not guaranteed to stay unmodified.
  */
 template <typename math_t>
-void lstsqSvdJacobi(raft::device_resources const& handle,
+void lstsqSvdJacobi(raft::resources const& handle,
                     math_t* A,
                     const int n_rows,
                     const int n_cols,
@@ -188,7 +191,7 @@ void lstsqSvdJacobi(raft::device_resources const& handle,
   gesvdjInfo_t gesvdj_params;
   RAFT_CUSOLVER_TRY(cusolverDnCreateGesvdjInfo(&gesvdj_params));
   int cusolverWorkSetSize      = 0;
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
   // #TODO: Call from public API when ready
   RAFT_CUSOLVER_TRY(
     raft::linalg::detail::cusolverDngesvdj_bufferSize<math_t>(cusolverH,
@@ -247,7 +250,7 @@ void lstsqSvdJacobi(raft::device_resources const& handle,
  *  (`w = (A^T A)^-1  A^T b`)
  */
 template <typename math_t>
-void lstsqEig(raft::device_resources const& handle,
+void lstsqEig(raft::resources const& handle,
               const math_t* A,
               const int n_rows,
               const int n_cols,
@@ -256,15 +259,15 @@ void lstsqEig(raft::device_resources const& handle,
               cudaStream_t stream)
 {
   rmm::cuda_stream_view mainStream   = rmm::cuda_stream_view(stream);
-  rmm::cuda_stream_view multAbStream = handle.get_next_usable_stream();
+  rmm::cuda_stream_view multAbStream = resource::get_next_usable_stream(handle);
   bool concurrent;
   // Check if the two streams can run concurrently. This is needed because a legacy default stream
   // would synchronize with other blocking streams. To avoid synchronization in such case, we try to
   // use an additional stream from the pool.
   if (!are_implicitly_synchronized(mainStream, multAbStream)) {
     concurrent = true;
-  } else if (handle.get_stream_pool_size() > 1) {
-    mainStream = handle.get_next_usable_stream();
+  } else if (resource::get_stream_pool_size(handle) > 1) {
+    mainStream = resource::get_next_usable_stream(handle);
     concurrent = true;
   } else {
     multAbStream = mainStream;
@@ -351,7 +354,7 @@ void lstsqEig(raft::device_resources const& handle,
  *            Warning: the content of this vector is modified by the cuSOLVER routines.
  */
 template <typename math_t>
-void lstsqQR(raft::device_resources const& handle,
+void lstsqQR(raft::resources const& handle,
              math_t* A,
              const int n_rows,
              const int n_cols,
@@ -359,8 +362,8 @@ void lstsqQR(raft::device_resources const& handle,
              math_t* w,
              cudaStream_t stream)
 {
-  cublasHandle_t cublasH       = handle.get_cublas_handle();
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cublasHandle_t cublasH       = resource::get_cublas_handle(handle);
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   int m = n_rows;
   int n = n_cols;
diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh
index c4959e6812..40739ab54b 100644
--- a/cpp/include/raft/linalg/detail/map.cuh
+++ b/cpp/include/raft/linalg/detail/map.cuh
@@ -17,7 +17,9 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/device_resources.hpp>  // TODO: remove this
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/input_validation.hpp>
 #include <raft/util/integer_utils.hpp>
@@ -196,7 +198,7 @@ void map_check_shape(OutType out, InType in)
  * @tparam Func the device-lambda performing the actual operation
  * @tparam InTypes data-types of the inputs (device_mdspan)
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[out] out the output of the map operation (device_mdspan)
  * @param[in] f device lambda of type
  *                 ([auto offset], InTypes::value_type xs...) -> OutType::value_type
@@ -208,7 +210,7 @@ template <bool PassOffset,
           typename... InTypes,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InTypes...>>
-void map(const raft::device_resources& res, OutType out, Func f, InTypes... ins)
+void map(const raft::resources& res, OutType out, Func f, InTypes... ins)
 {
   RAFT_EXPECTS(raft::is_row_or_column_major(out), "Output must be contiguous");
   (map_check_shape(out, ins), ...);
@@ -218,15 +220,21 @@ void map(const raft::device_resources& res, OutType out, Func f, InTypes... ins)
         typename OutType::value_type,
         std::uint32_t,
         Func,
-        typename InTypes::value_type...>(
-      res.get_stream(), out.data_handle(), uint32_t(out.size()), f, ins.data_handle()...);
+        typename InTypes::value_type...>(resource::get_cuda_stream(res),
+                                         out.data_handle(),
+                                         uint32_t(out.size()),
+                                         f,
+                                         ins.data_handle()...);
   } else {
     map<PassOffset,
         typename OutType::value_type,
         std::uint64_t,
         Func,
-        typename InTypes::value_type...>(
-      res.get_stream(), out.data_handle(), uint64_t(out.size()), f, ins.data_handle()...);
+        typename InTypes::value_type...>(resource::get_cuda_stream(res),
+                                         out.data_handle(),
+                                         uint64_t(out.size()),
+                                         f,
+                                         ins.data_handle()...);
   }
 }
 
diff --git a/cpp/include/raft/linalg/detail/map_then_reduce.cuh b/cpp/include/raft/linalg/detail/map_then_reduce.cuh
index c22ef09809..6fae16117f 100644
--- a/cpp/include/raft/linalg/detail/map_then_reduce.cuh
+++ b/cpp/include/raft/linalg/detail/map_then_reduce.cuh
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <cub/cub.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/vectorized.cuh>
 
diff --git a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
index 0c1261261c..61a0e84c11 100644
--- a/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/detail/matrix_vector_op.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/linewise_op.cuh>
 
 namespace raft {
@@ -33,8 +34,8 @@ void matrixVectorOp(MatT* out,
                     Lambda op,
                     cudaStream_t stream)
 {
-  raft::device_resources handle(stream);
-
+  raft::resources handle;
+  resource::set_cuda_stream(handle, stream);
   bool along_lines = rowMajor == bcastAlongRows;
   if (rowMajor) {
     matrix::linewise_op<MatT, IdxType, row_major, Lambda>(
@@ -72,7 +73,8 @@ void matrixVectorOp(MatT* out,
                     Lambda op,
                     cudaStream_t stream)
 {
-  raft::device_resources handle(stream);
+  raft::resources handle;
+  resource::set_cuda_stream(handle, stream);
   bool along_lines = rowMajor == bcastAlongRows;
   if (rowMajor) {
     matrix::linewise_op<MatT, IdxType, row_major, Lambda>(
diff --git a/cpp/include/raft/linalg/detail/rsvd.cuh b/cpp/include/raft/linalg/detail/rsvd.cuh
index 48b9e1d2db..50cb339ea1 100644
--- a/cpp/include/raft/linalg/detail/rsvd.cuh
+++ b/cpp/include/raft/linalg/detail/rsvd.cuh
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
 #include <raft/linalg/eig.cuh>
 #include <raft/linalg/gemm.cuh>
 #include <raft/linalg/qr.cuh>
@@ -57,7 +59,7 @@ namespace detail {
  * @param stream cuda stream
  */
 template <typename math_t>
-void rsvdFixedRank(raft::device_resources const& handle,
+void rsvdFixedRank(raft::resources const& handle,
                    math_t* M,
                    int n_rows,
                    int n_cols,
@@ -74,8 +76,8 @@ void rsvdFixedRank(raft::device_resources const& handle,
                    int max_sweeps,
                    cudaStream_t stream)
 {
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
-  cublasHandle_t cublasH       = handle.get_cublas_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
+  cublasHandle_t cublasH       = resource::get_cublas_handle(handle);
 
   // All the notations are following Algorithm 4 & 5 in S. Voronin's paper:
   // https://arxiv.org/abs/1502.05366
@@ -377,7 +379,7 @@ void rsvdFixedRank(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-void rsvdPerc(raft::device_resources const& handle,
+void rsvdPerc(raft::resources const& handle,
               math_t* M,
               int n_rows,
               int n_cols,
diff --git a/cpp/include/raft/linalg/detail/svd.cuh b/cpp/include/raft/linalg/detail/svd.cuh
index 94cd9e2789..5a4851bf6e 100644
--- a/cpp/include/raft/linalg/detail/svd.cuh
+++ b/cpp/include/raft/linalg/detail/svd.cuh
@@ -18,12 +18,15 @@
 
 #include "cublas_wrappers.hpp"
 #include "cusolver_wrappers.hpp"
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
 #include <raft/linalg/eig.cuh>
 #include <raft/linalg/gemm.cuh>
 #include <raft/linalg/transpose.cuh>
 
 #include <raft/common/nvtx.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/diagonal.cuh>
 #include <raft/matrix/math.cuh>
 #include <raft/matrix/norm.cuh>
@@ -38,7 +41,7 @@ namespace linalg {
 namespace detail {
 
 template <typename T>
-void svdQR(raft::device_resources const& handle,
+void svdQR(raft::resources const& handle,
            T* in,
            int n_rows,
            int n_cols,
@@ -52,8 +55,8 @@ void svdQR(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "raft::linalg::svdQR(%d, %d)", n_rows, n_cols);
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
-  cublasHandle_t cublasH       = handle.get_cublas_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
+  cublasHandle_t cublasH       = resource::get_cublas_handle(handle);
 
   const int m = n_rows;
   const int n = n_cols;
@@ -98,14 +101,14 @@ void svdQR(raft::device_resources const& handle,
 
   int dev_info;
   raft::update_host(&dev_info, devInfo.data(), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   ASSERT(dev_info == 0,
          "svd.cuh: svd couldn't converge to a solution. "
          "This usually occurs when some of the features do not vary enough.");
 }
 
 template <typename math_t, typename idx_t>
-void svdEig(raft::device_resources const& handle,
+void svdEig(raft::resources const& handle,
             math_t* in,
             idx_t n_rows,
             idx_t n_cols,
@@ -117,8 +120,8 @@ void svdEig(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "raft::linalg::svdEig(%d, %d)", n_rows, n_cols);
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
-  cublasHandle_t cublasH       = handle.get_cublas_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
+  cublasHandle_t cublasH       = resource::get_cublas_handle(handle);
 
   auto len = n_cols * n_cols;
   rmm::device_uvector<math_t> in_cross_mult(len, stream);
@@ -167,7 +170,7 @@ void svdEig(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void svdJacobi(raft::device_resources const& handle,
+void svdJacobi(raft::resources const& handle,
                math_t* in,
                int n_rows,
                int n_cols,
@@ -182,7 +185,7 @@ void svdJacobi(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols);
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   gesvdjInfo_t gesvdj_params = NULL;
 
@@ -237,7 +240,7 @@ void svdJacobi(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void svdReconstruction(raft::device_resources const& handle,
+void svdReconstruction(raft::resources const& handle,
                        math_t* U,
                        math_t* S,
                        math_t* V,
@@ -268,7 +271,7 @@ void svdReconstruction(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-bool evaluateSVDByL2Norm(raft::device_resources const& handle,
+bool evaluateSVDByL2Norm(raft::resources const& handle,
                          math_t* A_d,
                          math_t* U,
                          math_t* S_vec,
@@ -279,7 +282,7 @@ bool evaluateSVDByL2Norm(raft::device_resources const& handle,
                          math_t tol,
                          cudaStream_t stream)
 {
-  cublasHandle_t cublasH = handle.get_cublas_handle();
+  cublasHandle_t cublasH = resource::get_cublas_handle(handle);
 
   int m = n_rows, n = n_cols;
 
diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh
index 428b9ba618..d617b065da 100644
--- a/cpp/include/raft/linalg/divide.cuh
+++ b/cpp/include/raft/linalg/divide.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/divide.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/util/cuda_utils.cuh>
@@ -56,7 +57,7 @@ void divideScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[in] scalar    raft::host_scalar_view
  * @param[out] out    Output
@@ -66,7 +67,7 @@ template <typename InType,
           typename ScalarIdxType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void divide_scalar(raft::device_resources const& handle,
+void divide_scalar(raft::resources const& handle,
                    InType in,
                    OutType out,
                    raft::host_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
@@ -83,13 +84,13 @@ void divide_scalar(raft::device_resources const& handle,
                                                          in.data_handle(),
                                                          *scalar.data_handle(),
                                                          static_cast<std::uint32_t>(out.size()),
-                                                         handle.get_stream());
+                                                         resource::get_cuda_stream(handle));
   } else {
     divideScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                          in.data_handle(),
                                                          *scalar.data_handle(),
                                                          static_cast<std::uint64_t>(out.size()),
-                                                         handle.get_stream());
+                                                         resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/dot.cuh b/cpp/include/raft/linalg/dot.cuh
index 917188d695..9db9074c35 100644
--- a/cpp/include/raft/linalg/dot.cuh
+++ b/cpp/include/raft/linalg/dot.cuh
@@ -18,11 +18,13 @@
 
 #pragma once
 
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::linalg {
 
@@ -33,7 +35,7 @@ namespace raft::linalg {
 
 /**
  * @brief Computes the dot product of two vectors.
- * @param[in] handle   raft::device_resources
+ * @param[in] handle   raft::resources
  * @param[in] x        First input vector
  * @param[in] y        Second input vector
  * @param[out] out     The output dot product between the x and y vectors.
@@ -43,7 +45,7 @@ template <typename ElementType,
           typename ScalarIndexType,
           typename LayoutPolicy1,
           typename LayoutPolicy2>
-void dot(raft::device_resources const& handle,
+void dot(raft::resources const& handle,
          raft::device_vector_view<const ElementType, IndexType, LayoutPolicy1> x,
          raft::device_vector_view<const ElementType, IndexType, LayoutPolicy2> y,
          raft::device_scalar_view<ElementType, ScalarIndexType> out)
@@ -51,19 +53,19 @@ void dot(raft::device_resources const& handle,
   RAFT_EXPECTS(x.size() == y.size(),
                "Size mismatch between x and y input vectors in raft::linalg::dot");
 
-  RAFT_CUBLAS_TRY(detail::cublasdot(handle.get_cublas_handle(),
+  RAFT_CUBLAS_TRY(detail::cublasdot(resource::get_cublas_handle(handle),
                                     x.size(),
                                     x.data_handle(),
                                     x.stride(0),
                                     y.data_handle(),
                                     y.stride(0),
                                     out.data_handle(),
-                                    handle.get_stream()));
+                                    resource::get_cuda_stream(handle)));
 }
 
 /**
  * @brief Computes the dot product of two vectors.
- * @param[in] handle   raft::device_resources
+ * @param[in] handle   raft::resources
  * @param[in] x        First input vector
  * @param[in] y        Second input vector
  * @param[out] out     The output dot product between the x and y vectors.
@@ -73,7 +75,7 @@ template <typename ElementType,
           typename ScalarIndexType,
           typename LayoutPolicy1,
           typename LayoutPolicy2>
-void dot(raft::device_resources const& handle,
+void dot(raft::resources const& handle,
          raft::device_vector_view<const ElementType, IndexType, LayoutPolicy1> x,
          raft::device_vector_view<const ElementType, IndexType, LayoutPolicy2> y,
          raft::host_scalar_view<ElementType, ScalarIndexType> out)
@@ -81,14 +83,14 @@ void dot(raft::device_resources const& handle,
   RAFT_EXPECTS(x.size() == y.size(),
                "Size mismatch between x and y input vectors in raft::linalg::dot");
 
-  RAFT_CUBLAS_TRY(detail::cublasdot(handle.get_cublas_handle(),
+  RAFT_CUBLAS_TRY(detail::cublasdot(resource::get_cublas_handle(handle),
                                     x.size(),
                                     x.data_handle(),
                                     x.stride(0),
                                     y.data_handle(),
                                     y.stride(0),
                                     out.data_handle(),
-                                    handle.get_stream()));
+                                    resource::get_cuda_stream(handle)));
 }
 
 /** @} */  // end of group dot
diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh
index 7829f8e49f..954bf19334 100644
--- a/cpp/include/raft/linalg/eig.cuh
+++ b/cpp/include/raft/linalg/eig.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/eig.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 
@@ -38,7 +39,7 @@ namespace linalg {
  * @param stream cuda stream
  */
 template <typename math_t>
-void eigDC(raft::device_resources const& handle,
+void eigDC(raft::resources const& handle,
            const math_t* in,
            std::size_t n_rows,
            std::size_t n_cols,
@@ -68,7 +69,7 @@ using detail::OVERWRITE_INPUT;
  * @param stream cuda stream
  */
 template <typename math_t>
-void eigSelDC(raft::device_resources const& handle,
+void eigSelDC(raft::resources const& handle,
               math_t* in,
               std::size_t n_rows,
               std::size_t n_cols,
@@ -97,7 +98,7 @@ void eigSelDC(raft::device_resources const& handle,
  * accuracy.
  */
 template <typename math_t>
-void eigJacobi(raft::device_resources const& handle,
+void eigJacobi(raft::resources const& handle,
                const math_t* in,
                std::size_t n_rows,
                std::size_t n_cols,
@@ -120,14 +121,14 @@ void eigJacobi(raft::device_resources const& handle,
  * symmetric matrices
  * @tparam ValueType the data-type of input and output
  * @tparam IntegerType Integer used for addressing
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param[in] in input raft::device_matrix_view (symmetric matrix that has real eig values and
  * vectors)
  * @param[out] eig_vectors: eigenvectors output of type raft::device_matrix_view
  * @param[out] eig_vals: eigen values output of type raft::device_vector_view
  */
 template <typename ValueType, typename IndexType>
-void eig_dc(raft::device_resources const& handle,
+void eig_dc(raft::resources const& handle,
             raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
             raft::device_matrix_view<ValueType, IndexType, raft::col_major> eig_vectors,
             raft::device_vector_view<ValueType, IndexType> eig_vals)
@@ -141,7 +142,7 @@ void eig_dc(raft::device_resources const& handle,
         in.extent(1),
         eig_vectors.data_handle(),
         eig_vals.data_handle(),
-        handle.get_stream());
+        resource::get_cuda_stream(handle));
 }
 
 /**
@@ -149,7 +150,7 @@ void eig_dc(raft::device_resources const& handle,
  *        for the column-major symmetric matrices
  * @tparam ValueType the data-type of input and output
  * @tparam IntegerType Integer used for addressing
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in input raft::device_matrix_view (symmetric matrix that has real eig values and
  * vectors)
  * @param[out] eig_vectors: eigenvectors output of type raft::device_matrix_view
@@ -158,7 +159,7 @@ void eig_dc(raft::device_resources const& handle,
  * @param[in] memUsage: the memory selection for eig vector output
  */
 template <typename ValueType, typename IndexType>
-void eig_dc_selective(raft::device_resources const& handle,
+void eig_dc_selective(raft::resources const& handle,
                       raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
                       raft::device_matrix_view<ValueType, IndexType, raft::col_major> eig_vectors,
                       raft::device_vector_view<ValueType, IndexType> eig_vals,
@@ -177,7 +178,7 @@ void eig_dc_selective(raft::device_resources const& handle,
                          eig_vectors.data_handle(),
                          eig_vals.data_handle(),
                          memUsage,
-                         handle.get_stream());
+                         resource::get_cuda_stream(handle));
 }
 
 /**
@@ -185,7 +186,7 @@ void eig_dc_selective(raft::device_resources const& handle,
  * column-major symmetric matrices (in parameter)
  * @tparam ValueType the data-type of input and output
  * @tparam IntegerType Integer used for addressing
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param[in] in input raft::device_matrix_view (symmetric matrix that has real eig values and
  * vectors)
  * @param[out] eig_vectors: eigenvectors output of type raft::device_matrix_view
@@ -196,7 +197,7 @@ void eig_dc_selective(raft::device_resources const& handle,
  * accuracy.
  */
 template <typename ValueType, typename IndexType>
-void eig_jacobi(raft::device_resources const& handle,
+void eig_jacobi(raft::resources const& handle,
                 raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
                 raft::device_matrix_view<ValueType, IndexType, raft::col_major> eig_vectors,
                 raft::device_vector_view<ValueType, IndexType> eig_vals,
@@ -212,7 +213,7 @@ void eig_jacobi(raft::device_resources const& handle,
             in.extent(1),
             eig_vectors.data_handle(),
             eig_vals.data_handle(),
-            handle.get_stream(),
+            resource::get_cuda_stream(handle),
             tol,
             sweeps);
 }
diff --git a/cpp/include/raft/linalg/gemm.cuh b/cpp/include/raft/linalg/gemm.cuh
index 7dfaa18911..aea9d52673 100644
--- a/cpp/include/raft/linalg/gemm.cuh
+++ b/cpp/include/raft/linalg/gemm.cuh
@@ -21,9 +21,10 @@
 #include "detail/gemm.hpp"
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
@@ -213,7 +214,7 @@ template <typename ValueType,
           typename                = std::enable_if_t<std::disjunction_v<
             std::is_same<ScalarViewType, raft::host_scalar_view<ValueType, ScalarIdxType>>,
             std::is_same<ScalarViewType, raft::device_scalar_view<ValueType, ScalarIdxType>>>>>
-void gemm(raft::device_resources const& handle,
+void gemm(raft::resources const& handle,
           raft::device_matrix_view<ValueType, IndexType, LayoutPolicyX> x,
           raft::device_matrix_view<ValueType, IndexType, LayoutPolicyY> y,
           raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z,
@@ -265,7 +266,7 @@ void gemm(raft::device_resources const& handle,
                                        is_z_col_major,
                                        is_x_col_major,
                                        is_y_col_major,
-                                       handle.get_stream(),
+                                       resource::get_cuda_stream(handle),
                                        alpha.value().data_handle(),
                                        beta.value().data_handle());
 }
diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh
index 019ec9f7ac..640964d018 100644
--- a/cpp/include/raft/linalg/gemv.cuh
+++ b/cpp/include/raft/linalg/gemv.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/gemv.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
@@ -50,7 +51,7 @@ namespace linalg {
  * @param [in] stream
  */
 template <typename math_t, bool DevicePointerMode = false>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const bool trans_a,
           const int m,
           const int n,
@@ -69,7 +70,7 @@ void gemv(raft::device_resources const& handle,
 }
 
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows,
           const int n_cols,
@@ -103,7 +104,7 @@ void gemv(raft::device_resources const& handle,
  * @param stream stream on which this function is run
  */
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -133,7 +134,7 @@ void gemv(raft::device_resources const& handle,
  * @param stream stream on which this function is run
  */
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -165,7 +166,7 @@ void gemv(raft::device_resources const& handle,
  * @param stream stream on which this function is run
  */
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -199,7 +200,7 @@ void gemv(raft::device_resources const& handle,
  *
  */
 template <typename math_t>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           const math_t* A,
           const int n_rows_a,
           const int n_cols_a,
@@ -246,7 +247,7 @@ template <typename ValueType,
           typename                = std::enable_if_t<std::disjunction_v<
             std::is_same<ScalarViewType, raft::host_scalar_view<ValueType, ScalarIdxType>>,
             std::is_same<ScalarViewType, raft::device_scalar_view<ValueType, ScalarIdxType>>>>>
-void gemv(raft::device_resources const& handle,
+void gemv(raft::resources const& handle,
           raft::device_matrix_view<const ValueType, IndexType, LayoutPolicy> A,
           raft::device_vector_view<const ValueType, IndexType> x,
           raft::device_vector_view<ValueType, IndexType> y,
@@ -300,7 +301,7 @@ void gemv(raft::device_resources const& handle,
                                beta.value().data_handle(),
                                y.data_handle(),
                                1,
-                               handle.get_stream());
+                               resource::get_cuda_stream(handle));
 }
 /** @} */  // end of gemv
 
diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh
index c753215737..20588cbe17 100644
--- a/cpp/include/raft/linalg/lstsq.cuh
+++ b/cpp/include/raft/linalg/lstsq.cuh
@@ -18,7 +18,8 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/lstsq.cuh>
 namespace raft {
 namespace linalg {
@@ -37,7 +38,7 @@ namespace linalg {
  * @param[in] stream cuda stream for ordering operations
  */
 template <typename math_t>
-void lstsqSvdQR(raft::device_resources const& handle,
+void lstsqSvdQR(raft::resources const& handle,
                 math_t* A,
                 const int n_rows,
                 const int n_cols,
@@ -62,7 +63,7 @@ void lstsqSvdQR(raft::device_resources const& handle,
  * @param[in] stream cuda stream for ordering operations
  */
 template <typename math_t>
-void lstsqSvdJacobi(raft::device_resources const& handle,
+void lstsqSvdJacobi(raft::resources const& handle,
                     math_t* A,
                     const int n_rows,
                     const int n_cols,
@@ -78,7 +79,7 @@ void lstsqSvdJacobi(raft::device_resources const& handle,
  *  (`w = (A^T A)^-1  A^T b`)
  */
 template <typename math_t>
-void lstsqEig(raft::device_resources const& handle,
+void lstsqEig(raft::resources const& handle,
               const math_t* A,
               const int n_rows,
               const int n_cols,
@@ -104,7 +105,7 @@ void lstsqEig(raft::device_resources const& handle,
  * @param[in] stream cuda stream for ordering operations
  */
 template <typename math_t>
-void lstsqQR(raft::device_resources const& handle,
+void lstsqQR(raft::resources const& handle,
              math_t* A,
              const int n_rows,
              const int n_cols,
@@ -125,7 +126,7 @@ void lstsqQR(raft::device_resources const& handle,
  * Via SVD decomposition of `A = U S Vt`.
  *
  * @tparam ValueType the data-type of input/output
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[inout] A input raft::device_matrix_view
  *            Warning: the content of this matrix is modified.
  * @param[inout] b input target raft::device_vector_view
@@ -133,7 +134,7 @@ void lstsqQR(raft::device_resources const& handle,
  * @param[out] w output coefficient raft::device_vector_view
  */
 template <typename ValueType, typename IndexType>
-void lstsq_svd_qr(raft::device_resources const& handle,
+void lstsq_svd_qr(raft::resources const& handle,
                   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> A,
                   raft::device_vector_view<const ValueType, IndexType> b,
                   raft::device_vector_view<ValueType, IndexType> w)
@@ -147,7 +148,7 @@ void lstsq_svd_qr(raft::device_resources const& handle,
              A.extent(1),
              const_cast<ValueType*>(b.data_handle()),
              w.data_handle(),
-             handle.get_stream());
+             resource::get_cuda_stream(handle));
 }
 
 /**
@@ -155,7 +156,7 @@ void lstsq_svd_qr(raft::device_resources const& handle,
  *  Via SVD decomposition of `A = U S V^T` using Jacobi iterations.
  *
  * @tparam ValueType the data-type of input/output
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[inout] A input raft::device_matrix_view
  *            Warning: the content of this matrix is modified.
  * @param[inout] b input target raft::device_vector_view
@@ -163,7 +164,7 @@ void lstsq_svd_qr(raft::device_resources const& handle,
  * @param[out] w output coefficient raft::device_vector_view
  */
 template <typename ValueType, typename IndexType>
-void lstsq_svd_jacobi(raft::device_resources const& handle,
+void lstsq_svd_jacobi(raft::resources const& handle,
                       raft::device_matrix_view<const ValueType, IndexType, raft::col_major> A,
                       raft::device_vector_view<const ValueType, IndexType> b,
                       raft::device_vector_view<ValueType, IndexType> w)
@@ -177,7 +178,7 @@ void lstsq_svd_jacobi(raft::device_resources const& handle,
                  A.extent(1),
                  const_cast<ValueType*>(b.data_handle()),
                  w.data_handle(),
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
 }
 
 /**
@@ -186,7 +187,7 @@ void lstsq_svd_jacobi(raft::device_resources const& handle,
  *  (`w = (A^T A)^-1  A^T b`)
  *
  * @tparam ValueType the data-type of input/output
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[inout] A input raft::device_matrix_view
  *            Warning: the content of this matrix is modified by the cuSOLVER routines.
  * @param[inout] b input target raft::device_vector_view
@@ -194,7 +195,7 @@ void lstsq_svd_jacobi(raft::device_resources const& handle,
  * @param[out] w output coefficient raft::device_vector_view
  */
 template <typename ValueType, typename IndexType>
-void lstsq_eig(raft::device_resources const& handle,
+void lstsq_eig(raft::resources const& handle,
                raft::device_matrix_view<const ValueType, IndexType, raft::col_major> A,
                raft::device_vector_view<const ValueType, IndexType> b,
                raft::device_vector_view<ValueType, IndexType> w)
@@ -208,7 +209,7 @@ void lstsq_eig(raft::device_resources const& handle,
            A.extent(1),
            const_cast<ValueType*>(b.data_handle()),
            w.data_handle(),
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /**
@@ -217,7 +218,7 @@ void lstsq_eig(raft::device_resources const& handle,
  *  (triangular system of equations `Rw = Q^T b`)
  *
  * @tparam ValueType the data-type of input/output
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[inout] A input raft::device_matrix_view
  *            Warning: the content of this matrix is modified.
  * @param[inout] b input target raft::device_vector_view
@@ -225,7 +226,7 @@ void lstsq_eig(raft::device_resources const& handle,
  * @param[out] w output coefficient raft::device_vector_view
  */
 template <typename ValueType, typename IndexType>
-void lstsq_qr(raft::device_resources const& handle,
+void lstsq_qr(raft::resources const& handle,
               raft::device_matrix_view<const ValueType, IndexType, raft::col_major> A,
               raft::device_vector_view<const ValueType, IndexType> b,
               raft::device_vector_view<ValueType, IndexType> w)
@@ -239,7 +240,7 @@ void lstsq_qr(raft::device_resources const& handle,
           A.extent(1),
           const_cast<ValueType*>(b.data_handle()),
           w.data_handle(),
-          handle.get_stream());
+          resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of lstsq
diff --git a/cpp/include/raft/linalg/map.cuh b/cpp/include/raft/linalg/map.cuh
index 57b3a7cb6f..e4bfeac020 100644
--- a/cpp/include/raft/linalg/map.cuh
+++ b/cpp/include/raft/linalg/map.cuh
@@ -21,7 +21,7 @@
 #include "detail/map.cuh"
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::linalg {
 
@@ -76,7 +76,7 @@ template <typename InType,
  * Usage example:
  * @code{.cpp}
  *  #include <raft/core/device_mdarray.hpp>
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/core/operators.hpp>
  *  #include <raft/linalg/map.cuh>
  *
@@ -90,7 +90,7 @@ template <typename InType,
  * @tparam Func the device-lambda performing the actual operation
  * @tparam InTypes data-types of the inputs (device_mdspan)
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[out] out the output of the map operation (device_mdspan)
  * @param[in] f device lambda
  *                 (InTypes::value_type xs...) -> OutType::value_type
@@ -101,7 +101,7 @@ template <typename OutType,
           typename... InTypes,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InTypes...>>
-void map(const raft::device_resources& res, OutType out, Func f, InTypes... ins)
+void map(const raft::resources& res, OutType out, Func f, InTypes... ins)
 {
   return detail::map<false>(res, out, f, ins...);
 }
@@ -113,7 +113,7 @@ void map(const raft::device_resources& res, OutType out, Func f, InTypes... ins)
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input (the same size as the output) (device_mdspan)
  * @param[out] out the output of the map operation (device_mdspan)
  * @param[in] f device lambda
@@ -124,7 +124,7 @@ template <typename InType1,
           typename Func,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1>>
-void map(const raft::device_resources& res, InType1 in1, OutType out, Func f)
+void map(const raft::resources& res, InType1 in1, OutType out, Func f)
 {
   return detail::map<false>(res, out, f, in1);
 }
@@ -137,7 +137,7 @@ void map(const raft::device_resources& res, InType1 in1, OutType out, Func f)
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input (the same size as the output) (device_mdspan)
  * @param[in] in2 the input (the same size as the output) (device_mdspan)
  * @param[out] out the output of the map operation (device_mdspan)
@@ -150,7 +150,7 @@ template <typename InType1,
           typename Func,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1, InType2>>
-void map(const raft::device_resources& res, InType1 in1, InType2 in2, OutType out, Func f)
+void map(const raft::resources& res, InType1 in1, InType2 in2, OutType out, Func f)
 {
   return detail::map<false>(res, out, f, in1, in2);
 }
@@ -164,7 +164,7 @@ void map(const raft::device_resources& res, InType1 in1, InType2 in2, OutType ou
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input 1 (the same size as the output) (device_mdspan)
  * @param[in] in2 the input 2 (the same size as the output) (device_mdspan)
  * @param[in] in3 the input 3 (the same size as the output) (device_mdspan)
@@ -179,8 +179,7 @@ template <typename InType1,
           typename Func,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1, InType2, InType3>>
-void map(
-  const raft::device_resources& res, InType1 in1, InType2 in2, InType3 in3, OutType out, Func f)
+void map(const raft::resources& res, InType1 in1, InType2 in2, InType3 in3, OutType out, Func f)
 {
   return detail::map<false>(res, out, f, in1, in2, in3);
 }
@@ -202,7 +201,7 @@ void map(
  * Usage example:
  * @code{.cpp}
  *  #include <raft/core/device_mdarray.hpp>
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/core/operators.hpp>
  *  #include <raft/linalg/map.cuh>
  *
@@ -214,7 +213,7 @@ void map(
  * @tparam Func the device-lambda performing the actual operation
  * @tparam InTypes data-types of the inputs (device_mdspan)
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[out] out the output of the map operation (device_mdspan)
  * @param[in] f device lambda
  *                 (auto offset, InTypes::value_type xs...) -> OutType::value_type
@@ -225,7 +224,7 @@ template <typename OutType,
           typename... InTypes,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InTypes...>>
-void map_offset(const raft::device_resources& res, OutType out, Func f, InTypes... ins)
+void map_offset(const raft::resources& res, OutType out, Func f, InTypes... ins)
 {
   return detail::map<true>(res, out, f, ins...);
 }
@@ -237,7 +236,7 @@ void map_offset(const raft::device_resources& res, OutType out, Func f, InTypes.
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input (the same size as the output) (device_mdspan)
  * @param[out] out the output of the map operation (device_mdspan)
  * @param[in] f device lambda
@@ -248,7 +247,7 @@ template <typename InType1,
           typename Func,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1>>
-void map_offset(const raft::device_resources& res, InType1 in1, OutType out, Func f)
+void map_offset(const raft::resources& res, InType1 in1, OutType out, Func f)
 {
   return detail::map<true>(res, out, f, in1);
 }
@@ -261,7 +260,7 @@ void map_offset(const raft::device_resources& res, InType1 in1, OutType out, Fun
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input (the same size as the output) (device_mdspan)
  * @param[in] in2 the input (the same size as the output) (device_mdspan)
  * @param[out] out the output of the map operation (device_mdspan)
@@ -274,7 +273,7 @@ template <typename InType1,
           typename Func,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1, InType2>>
-void map_offset(const raft::device_resources& res, InType1 in1, InType2 in2, OutType out, Func f)
+void map_offset(const raft::resources& res, InType1 in1, InType2 in2, OutType out, Func f)
 {
   return detail::map<true>(res, out, f, in1, in2);
 }
@@ -288,7 +287,7 @@ void map_offset(const raft::device_resources& res, InType1 in1, InType2 in2, Out
  * @tparam OutType data-type of the result (device_mdspan)
  * @tparam Func the device-lambda performing the actual operation
  *
- * @param[in] res raft::device_resources
+ * @param[in] res raft::resources
  * @param[in] in1 the input 1 (the same size as the output) (device_mdspan)
  * @param[in] in2 the input 2 (the same size as the output) (device_mdspan)
  * @param[in] in3 the input 3 (the same size as the output) (device_mdspan)
@@ -305,7 +304,7 @@ template <typename InType1,
           typename = raft::enable_if_output_device_mdspan<OutType>,
           typename = raft::enable_if_input_device_mdspan<InType1, InType2, InType3>>
 void map_offset(
-  const raft::device_resources& res, InType1 in1, InType2 in2, InType3 in3, OutType out, Func f)
+  const raft::resources& res, InType1 in1, InType2 in2, InType3 in3, OutType out, Func f)
 {
   return detail::map<true>(res, out, f, in1, in2, in3);
 }
diff --git a/cpp/include/raft/linalg/map_reduce.cuh b/cpp/include/raft/linalg/map_reduce.cuh
index b89f3bdd54..f17caa478b 100644
--- a/cpp/include/raft/linalg/map_reduce.cuh
+++ b/cpp/include/raft/linalg/map_reduce.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/map_then_reduce.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 
@@ -75,7 +76,7 @@ void mapReduce(OutType* out,
  * @tparam OutValueType the data-type of the output
  * @tparam ScalarIdxType index type of scalar
  * @tparam Args additional parameters
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in the input of type raft::device_vector_view
  * @param[in] neutral The neutral element of the reduction operation. For example:
  *    0 for sum, 1 for multiply, +Inf for Min, -Inf for Max
@@ -91,7 +92,7 @@ template <typename InValueType,
           typename OutValueType,
           typename ScalarIdxType,
           typename... Args>
-void map_reduce(raft::device_resources const& handle,
+void map_reduce(raft::resources const& handle,
                 raft::device_vector_view<const InValueType, IndexType> in,
                 raft::device_scalar_view<OutValueType, ScalarIdxType> out,
                 OutValueType neutral,
@@ -105,7 +106,7 @@ void map_reduce(raft::device_resources const& handle,
     neutral,
     map,
     op,
-    handle.get_stream(),
+    resource::get_cuda_stream(handle),
     in.data_handle(),
     args...);
 }
diff --git a/cpp/include/raft/linalg/matrix_vector.cuh b/cpp/include/raft/linalg/matrix_vector.cuh
index fa24ea28b7..85805c287a 100644
--- a/cpp/include/raft/linalg/matrix_vector.cuh
+++ b/cpp/include/raft/linalg/matrix_vector.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/linalg_types.hpp>
 #include <raft/matrix/detail/math.cuh>
 #include <raft/util/input_validation.hpp>
@@ -37,7 +38,7 @@ namespace raft::linalg {
  * the rows of the matrix or columns using enum class raft::linalg::Apply
  */
 template <typename math_t, typename idx_t, typename layout_t>
-void binary_mult_skip_zero(raft::device_resources const& handle,
+void binary_mult_skip_zero(raft::resources const& handle,
                            raft::device_matrix_view<math_t, idx_t, layout_t> data,
                            raft::device_vector_view<const math_t, idx_t> vec,
                            Apply apply)
@@ -58,7 +59,7 @@ void binary_mult_skip_zero(raft::device_resources const& handle,
                                                  data.extent(1),
                                                  row_major,
                                                  bcast_along_rows,
-                                                 handle.get_stream());
+                                                 resource::get_cuda_stream(handle));
 }
 
 /**
@@ -70,7 +71,7 @@ void binary_mult_skip_zero(raft::device_resources const& handle,
  * the rows of the matrix or columns using enum class raft::linalg::Apply
  */
 template <typename math_t, typename idx_t, typename layout_t>
-void binary_div(raft::device_resources const& handle,
+void binary_div(raft::resources const& handle,
                 raft::device_matrix_view<math_t, idx_t, layout_t> data,
                 raft::device_vector_view<const math_t, idx_t> vec,
                 Apply apply)
@@ -91,7 +92,7 @@ void binary_div(raft::device_resources const& handle,
                                         data.extent(1),
                                         row_major,
                                         bcast_along_rows,
-                                        handle.get_stream());
+                                        resource::get_cuda_stream(handle));
 }
 
 /**
@@ -105,7 +106,7 @@ void binary_div(raft::device_resources const& handle,
  * value if false
  */
 template <typename math_t, typename idx_t, typename layout_t>
-void binary_div_skip_zero(raft::device_resources const& handle,
+void binary_div_skip_zero(raft::resources const& handle,
                           raft::device_matrix_view<math_t, idx_t, layout_t> data,
                           raft::device_vector_view<const math_t, idx_t> vec,
                           Apply apply,
@@ -127,7 +128,7 @@ void binary_div_skip_zero(raft::device_resources const& handle,
                                                 data.extent(1),
                                                 row_major,
                                                 bcast_along_rows,
-                                                handle.get_stream(),
+                                                resource::get_cuda_stream(handle),
                                                 return_zero);
 }
 
@@ -140,7 +141,7 @@ void binary_div_skip_zero(raft::device_resources const& handle,
  * the rows of the matrix or columns using enum class raft::linalg::Apply
  */
 template <typename math_t, typename idx_t, typename layout_t>
-void binary_add(raft::device_resources const& handle,
+void binary_add(raft::resources const& handle,
                 raft::device_matrix_view<math_t, idx_t, layout_t> data,
                 raft::device_vector_view<const math_t, idx_t> vec,
                 Apply apply)
@@ -161,7 +162,7 @@ void binary_add(raft::device_resources const& handle,
                                         data.extent(1),
                                         row_major,
                                         bcast_along_rows,
-                                        handle.get_stream());
+                                        resource::get_cuda_stream(handle));
 }
 
 /**
@@ -173,7 +174,7 @@ void binary_add(raft::device_resources const& handle,
  * the rows of the matrix or columns using enum class raft::linalg::Apply
  */
 template <typename math_t, typename idx_t, typename layout_t>
-void binary_sub(raft::device_resources const& handle,
+void binary_sub(raft::resources const& handle,
                 raft::device_matrix_view<math_t, idx_t, layout_t> data,
                 raft::device_vector_view<const math_t, idx_t> vec,
                 Apply apply)
@@ -194,7 +195,7 @@ void binary_sub(raft::device_resources const& handle,
                                         data.extent(1),
                                         row_major,
                                         bcast_along_rows,
-                                        handle.get_stream());
+                                        resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of matrix_vector
diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh
index e8833a2779..e620d227eb 100644
--- a/cpp/include/raft/linalg/matrix_vector_op.cuh
+++ b/cpp/include/raft/linalg/matrix_vector_op.cuh
@@ -20,9 +20,10 @@
 
 #include "detail/matrix_vector_op.cuh"
 #include "linalg_types.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
@@ -123,7 +124,7 @@ void matrixVectorOp(MatT* out,
  * @tparam LayoutPolicy the layout of input and output (raft::row_major or raft::col_major)
  * @tparam Lambda a device function which represents a binary operator
  * @tparam IndexType Integer used for addressing
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] matrix input raft::matrix_view
  * @param[in] vec vector raft::vector_view
  * @param[out] out output raft::matrix_view
@@ -136,7 +137,7 @@ template <typename MatValueType,
           typename LayoutPolicy,
           typename Lambda,
           typename IndexType>
-void matrix_vector_op(raft::device_resources const& handle,
+void matrix_vector_op(raft::resources const& handle,
                       raft::device_matrix_view<const MatValueType, IndexType, LayoutPolicy> matrix,
                       raft::device_vector_view<const VecValueType, IndexType> vec,
                       raft::device_matrix_view<MatValueType, IndexType, LayoutPolicy> out,
@@ -166,7 +167,7 @@ void matrix_vector_op(raft::device_resources const& handle,
                  rowMajor,
                  bcastAlongRows,
                  op,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
 }
 
 /**
@@ -183,7 +184,7 @@ void matrix_vector_op(raft::device_resources const& handle,
  * @tparam LayoutPolicy the layout of input and output (raft::row_major or raft::col_major)
  * @tparam Lambda a device function which represents a binary operator
  * @tparam IndexType Integer used for addressing
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param matrix input raft::matrix_view
  * @param vec1 the first vector raft::vector_view
  * @param vec2 the second vector raft::vector_view
@@ -198,7 +199,7 @@ template <typename MatValueType,
           typename LayoutPolicy,
           typename Lambda,
           typename IndexType>
-void matrix_vector_op(raft::device_resources const& handle,
+void matrix_vector_op(raft::resources const& handle,
                       raft::device_matrix_view<const MatValueType, IndexType, LayoutPolicy> matrix,
                       raft::device_vector_view<const Vec1ValueType, IndexType> vec1,
                       raft::device_vector_view<const Vec2ValueType, IndexType> vec2,
@@ -234,7 +235,7 @@ void matrix_vector_op(raft::device_resources const& handle,
                  rowMajor,
                  bcastAlongRows,
                  op,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group matrix_vector_op
diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh
index 317c085673..d45f11524d 100644
--- a/cpp/include/raft/linalg/mean_squared_error.cuh
+++ b/cpp/include/raft/linalg/mean_squared_error.cuh
@@ -20,6 +20,7 @@
 
 #include "detail/mean_squared_error.cuh"
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 
 namespace raft {
 namespace linalg {
@@ -53,14 +54,14 @@ void meanSquaredError(
  * @tparam IndexType Input/Output index type
  * @tparam OutValueType Output data-type
  * @tparam TPB threads-per-block
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] A input raft::device_vector_view
  * @param[in] B input raft::device_vector_view
  * @param[out] out the output mean squared error value of type raft::device_scalar_view
  * @param[in] weight weight to apply to every term in the mean squared error calculation
  */
 template <typename InValueType, typename IndexType, typename OutValueType>
-void mean_squared_error(raft::device_resources const& handle,
+void mean_squared_error(raft::resources const& handle,
                         raft::device_vector_view<const InValueType, IndexType> A,
                         raft::device_vector_view<const InValueType, IndexType> B,
                         raft::device_scalar_view<OutValueType, IndexType> out,
@@ -68,8 +69,12 @@ void mean_squared_error(raft::device_resources const& handle,
 {
   RAFT_EXPECTS(A.size() == B.size(), "Size mismatch between inputs");
 
-  meanSquaredError(
-    out.data_handle(), A.data_handle(), B.data_handle(), A.extent(0), weight, handle.get_stream());
+  meanSquaredError(out.data_handle(),
+                   A.data_handle(),
+                   B.data_handle(),
+                   A.extent(0),
+                   weight,
+                   resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group mean_squared_error
diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh
index bdca641616..3ade108235 100644
--- a/cpp/include/raft/linalg/multiply.cuh
+++ b/cpp/include/raft/linalg/multiply.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/multiply.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -56,7 +57,7 @@ void multiplyScalar(out_t* out, const in_t* in, in_t scalar, IdxType len, cudaSt
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in the input buffer
  * @param[out] out the output buffer
  * @param[in] scalar the scalar used in the operations
@@ -68,7 +69,7 @@ template <typename InType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
 void multiply_scalar(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   InType in,
   OutType out,
   raft::host_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
@@ -85,13 +86,13 @@ void multiply_scalar(
                                                            in.data_handle(),
                                                            *scalar.data_handle(),
                                                            static_cast<std::uint32_t>(out.size()),
-                                                           handle.get_stream());
+                                                           resource::get_cuda_stream(handle));
   } else {
     multiplyScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                            in.data_handle(),
                                                            *scalar.data_handle(),
                                                            static_cast<std::uint64_t>(out.size()),
-                                                           handle.get_stream());
+                                                           resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/norm.cuh b/cpp/include/raft/linalg/norm.cuh
index 8bc6720b4e..c426250e18 100644
--- a/cpp/include/raft/linalg/norm.cuh
+++ b/cpp/include/raft/linalg/norm.cuh
@@ -20,6 +20,7 @@
 
 #include "detail/norm.cuh"
 #include "linalg_types.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
@@ -99,7 +100,7 @@ void colNorm(Type* dots,
  * @tparam LayoutPolicy the layout of input (raft::row_major or raft::col_major)
  * @tparam IdxType Integer type used to for addressing
  * @tparam Lambda device final lambda
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in the input raft::device_matrix_view
  * @param[out] out the output raft::device_vector_view
  * @param[in] type the type of norm to be applied
@@ -111,7 +112,7 @@ template <typename ElementType,
           typename LayoutPolicy,
           typename IndexType,
           typename Lambda = raft::identity_op>
-void norm(raft::device_resources const& handle,
+void norm(raft::resources const& handle,
           raft::device_matrix_view<const ElementType, IndexType, LayoutPolicy> in,
           raft::device_vector_view<ElementType, IndexType> out,
           NormType type,
@@ -132,7 +133,7 @@ void norm(raft::device_resources const& handle,
             in.extent(0),
             type,
             row_major,
-            handle.get_stream(),
+            resource::get_cuda_stream(handle),
             fin_op);
   } else {
     RAFT_EXPECTS(static_cast<IndexType>(out.size()) == in.extent(1),
@@ -143,7 +144,7 @@ void norm(raft::device_resources const& handle,
             in.extent(0),
             type,
             row_major,
-            handle.get_stream(),
+            resource::get_cuda_stream(handle),
             fin_op);
   }
 }
diff --git a/cpp/include/raft/linalg/normalize.cuh b/cpp/include/raft/linalg/normalize.cuh
index 027ebb16e8..86bc597bdc 100644
--- a/cpp/include/raft/linalg/normalize.cuh
+++ b/cpp/include/raft/linalg/normalize.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "detail/normalize.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/operators.hpp>
 #include <raft/linalg/norm_types.hpp>
@@ -37,7 +38,7 @@ namespace linalg {
  * @tparam MainLambda Type of main_op
  * @tparam ReduceLambda Type of reduce_op
  * @tparam FinalLambda Type of fin_op
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in the input raft::device_matrix_view
  * @param[out] out the output raft::device_matrix_view
  * @param[in] init Initialization value, i.e identity element for the reduction operation
@@ -52,7 +53,7 @@ template <typename ElementType,
           typename MainLambda,
           typename ReduceLambda,
           typename FinalLambda>
-void row_normalize(raft::device_resources const& handle,
+void row_normalize(raft::resources const& handle,
                    raft::device_matrix_view<const ElementType, IndexType, row_major> in,
                    raft::device_matrix_view<ElementType, IndexType, row_major> out,
                    ElementType init,
@@ -73,7 +74,7 @@ void row_normalize(raft::device_resources const& handle,
                               in.extent(1),
                               in.extent(0),
                               init,
-                              handle.get_stream(),
+                              resource::get_cuda_stream(handle),
                               main_op,
                               reduce_op,
                               fin_op,
@@ -85,14 +86,14 @@ void row_normalize(raft::device_resources const& handle,
  *
  * @tparam ElementType Input/Output data type
  * @tparam IndexType Integer type used to for addressing
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in the input raft::device_matrix_view
  * @param[out] out the output raft::device_matrix_view
  * @param[in] norm_type the type of norm to be applied
  * @param[in] eps If the norm is below eps, the row is considered zero and no division is applied
  */
 template <typename ElementType, typename IndexType>
-void row_normalize(raft::device_resources const& handle,
+void row_normalize(raft::resources const& handle,
                    raft::device_matrix_view<const ElementType, IndexType, row_major> in,
                    raft::device_matrix_view<ElementType, IndexType, row_major> out,
                    NormType norm_type,
diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh
index 057d6f6827..26ac1035ca 100644
--- a/cpp/include/raft/linalg/power.cuh
+++ b/cpp/include/raft/linalg/power.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/input_validation.hpp>
@@ -73,7 +74,7 @@ void power(out_t* out, const in_t* in1, const in_t* in2, IdxType len, cudaStream
  * @brief Elementwise power operation on the input buffers
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in1    First Input
  * @param[in] in2    Second Input
  * @param[out] out    Output
@@ -82,7 +83,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void power(raft::device_resources const& handle, InType in1, InType in2, OutType out)
+void power(raft::resources const& handle, InType in1, InType in2, OutType out)
 {
   using in_value_t  = typename InType::value_type;
   using out_value_t = typename OutType::value_type;
@@ -98,13 +99,13 @@ void power(raft::device_resources const& handle, InType in1, InType in2, OutType
                                                   in1.data_handle(),
                                                   in2.data_handle(),
                                                   static_cast<std::uint32_t>(out.size()),
-                                                  handle.get_stream());
+                                                  resource::get_cuda_stream(handle));
   } else {
     power<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                   in1.data_handle(),
                                                   in2.data_handle(),
                                                   static_cast<std::uint64_t>(out.size()),
-                                                  handle.get_stream());
+                                                  resource::get_cuda_stream(handle));
   }
 }
 
@@ -113,7 +114,7 @@ void power(raft::device_resources const& handle, InType in1, InType in2, OutType
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[out] out    Output
  * @param[in] scalar    raft::host_scalar_view
@@ -124,7 +125,7 @@ template <typename InType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
 void power_scalar(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   InType in,
   OutType out,
   const raft::host_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
@@ -141,13 +142,13 @@ void power_scalar(
                                                         in.data_handle(),
                                                         *scalar.data_handle(),
                                                         static_cast<std::uint32_t>(out.size()),
-                                                        handle.get_stream());
+                                                        resource::get_cuda_stream(handle));
   } else {
     powerScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                         in.data_handle(),
                                                         *scalar.data_handle(),
                                                         static_cast<std::uint64_t>(out.size()),
-                                                        handle.get_stream());
+                                                        resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/qr.cuh b/cpp/include/raft/linalg/qr.cuh
index 948996d0ac..022c382e67 100644
--- a/cpp/include/raft/linalg/qr.cuh
+++ b/cpp/include/raft/linalg/qr.cuh
@@ -74,7 +74,7 @@ void qrGetQR(raft::resources const& handle,
 
 /**
  * @brief Compute the QR decomposition of matrix M and return only the Q matrix.
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M Input raft::device_matrix_view
  * @param[out] Q Output raft::device_matrix_view
  */
@@ -95,7 +95,7 @@ void qr_get_q(raft::resources const& handle,
 
 /**
  * @brief Compute the QR decomposition of matrix M and return both the Q and R matrices.
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M Input raft::device_matrix_view
  * @param[in] Q Output raft::device_matrix_view
  * @param[out] R Output raft::device_matrix_view
diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh
index 06f62f207e..a3d0ef71d0 100644
--- a/cpp/include/raft/linalg/reduce.cuh
+++ b/cpp/include/raft/linalg/reduce.cuh
@@ -20,6 +20,7 @@
 
 #include "detail/reduce.cuh"
 #include "linalg_types.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
@@ -105,7 +106,7 @@ void reduce(OutType* dots,
  * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm)
  * It must be a 'callable' supporting the following input and output:
  * <pre>OutType (*FinalLambda)(OutType);</pre>
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] data Input of type raft::device_matrix_view
  * @param[out] dots Output of type raft::device_matrix_view
  * @param[in] init initial value to use for the reduction
@@ -122,7 +123,7 @@ template <typename InElementType,
           typename MainLambda     = raft::identity_op,
           typename ReduceLambda   = raft::add_op,
           typename FinalLambda    = raft::identity_op>
-void reduce(raft::device_resources const& handle,
+void reduce(raft::resources const& handle,
             raft::device_matrix_view<const InElementType, IdxType, LayoutPolicy> data,
             raft::device_vector_view<OutElementType, IdxType> dots,
             OutElementType init,
@@ -152,7 +153,7 @@ void reduce(raft::device_resources const& handle,
          init,
          row_major,
          along_rows,
-         handle.get_stream(),
+         resource::get_cuda_stream(handle),
          inplace,
          main_op,
          reduce_op,
diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
index 71c8cf14a1..6eaf1e2ba7 100644
--- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh
@@ -19,9 +19,10 @@
 #pragma once
 
 #include "detail/reduce_cols_by_key.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
@@ -69,7 +70,7 @@ void reduce_cols_by_key(const T* data,
  * @tparam ElementType the input data type (as well as the output reduced matrix)
  * @tparam KeyType data type of the keys
  * @tparam IndexType indexing arithmetic type
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] data the input data (dim = nrows x ncols). This is assumed to be in
  * row-major layout of type raft::device_matrix_view
  * @param[in] keys keys raft::device_vector_view (len = ncols). It is assumed that each key in this
@@ -84,7 +85,7 @@ void reduce_cols_by_key(const T* data,
  */
 template <typename ElementType, typename KeyType = ElementType, typename IndexType = std::uint32_t>
 void reduce_cols_by_key(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ElementType, IndexType, raft::row_major> data,
   raft::device_vector_view<const KeyType, IndexType> keys,
   raft::device_matrix_view<ElementType, IndexType, raft::row_major> out,
@@ -106,7 +107,7 @@ void reduce_cols_by_key(
                      data.extent(0),
                      data.extent(1),
                      nkeys,
-                     handle.get_stream(),
+                     resource::get_cuda_stream(handle),
                      reset_sums);
 }
 
diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
index 0e83c9aa2b..fa624b2191 100644
--- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh
+++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh
@@ -19,9 +19,10 @@
 #pragma once
 
 #include "detail/reduce_rows_by_key.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace linalg {
@@ -136,7 +137,7 @@ void reduce_rows_by_key(const DataIteratorT d_A,
  * @tparam KeyType data-type of keys
  * @tparam WeightType data-type of weights
  * @tparam IndexType index type
- * @param[in]  handle      raft::device_resources
+ * @param[in]  handle      raft::resources
  * @param[in]  d_A         Input raft::device_mdspan (ncols * nrows)
  * @param[in]  d_keys      Keys for each row raft::device_vector_view (1 x nrows)
  * @param[out] d_sums      Row sums by key raft::device_matrix_view (ncols x d_keys)
@@ -148,7 +149,7 @@ void reduce_rows_by_key(const DataIteratorT d_A,
  */
 template <typename ElementType, typename KeyType, typename WeightType, typename IndexType>
 void reduce_rows_by_key(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ElementType, IndexType, raft::row_major> d_A,
   raft::device_vector_view<const KeyType, IndexType> d_keys,
   raft::device_matrix_view<ElementType, IndexType, raft::row_major> d_sums,
@@ -173,7 +174,7 @@ void reduce_rows_by_key(
                        d_A.extent(0),
                        n_unique_keys,
                        d_sums.data_handle(),
-                       handle.get_stream(),
+                       resource::get_cuda_stream(handle),
                        reset_sums);
   } else {
     reduce_rows_by_key(d_A.data_handle(),
@@ -184,7 +185,7 @@ void reduce_rows_by_key(
                        d_A.extent(0),
                        n_unique_keys,
                        d_sums.data_handle(),
-                       handle.get_stream(),
+                       resource::get_cuda_stream(handle),
                        reset_sums);
   }
 }
diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh
index 8a32467873..4a6c058061 100644
--- a/cpp/include/raft/linalg/rsvd.cuh
+++ b/cpp/include/raft/linalg/rsvd.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/rsvd.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 
@@ -47,7 +48,7 @@ namespace linalg {
  * @param stream cuda stream
  */
 template <typename math_t>
-void rsvdFixedRank(raft::device_resources const& handle,
+void rsvdFixedRank(raft::resources const& handle,
                    math_t* M,
                    int n_rows,
                    int n_cols,
@@ -104,7 +105,7 @@ void rsvdFixedRank(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-void rsvdPerc(raft::device_resources const& handle,
+void rsvdPerc(raft::resources const& handle,
               math_t* M,
               int n_rows,
               int n_cols,
@@ -154,7 +155,7 @@ void rsvdPerc(raft::device_resources const& handle,
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] p no. of upsamples
@@ -164,7 +165,7 @@ void rsvdPerc(raft::device_resources const& handle,
  * raft::col_major
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void rsvd_fixed_rank(raft::device_resources const& handle,
+void rsvd_fixed_rank(raft::resources const& handle,
                      raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
                      raft::device_vector_view<ValueType, IndexType> S_vec,
                      IndexType p,
@@ -202,7 +203,7 @@ void rsvd_fixed_rank(raft::device_resources const& handle,
                 false,
                 static_cast<ValueType>(0),
                 0,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
 }
 
 /**
@@ -228,7 +229,7 @@ void rsvd_fixed_rank(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] p no. of upsamples
@@ -239,7 +240,7 @@ void rsvd_fixed_rank(Args... args)
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
 void rsvd_fixed_rank_symmetric(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
   raft::device_vector_view<ValueType, IndexType> S_vec,
   IndexType p,
@@ -277,7 +278,7 @@ void rsvd_fixed_rank_symmetric(
                 false,
                 static_cast<ValueType>(0),
                 0,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
 }
 
 /**
@@ -303,7 +304,7 @@ void rsvd_fixed_rank_symmetric(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] p no. of upsamples
@@ -315,7 +316,7 @@ void rsvd_fixed_rank_symmetric(Args... args)
  * raft::col_major
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void rsvd_fixed_rank_jacobi(raft::device_resources const& handle,
+void rsvd_fixed_rank_jacobi(raft::resources const& handle,
                             raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
                             raft::device_vector_view<ValueType, IndexType> S_vec,
                             IndexType p,
@@ -355,7 +356,7 @@ void rsvd_fixed_rank_jacobi(raft::device_resources const& handle,
                 true,
                 tol,
                 max_sweeps,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
 }
 
 /**
@@ -381,7 +382,7 @@ void rsvd_fixed_rank_jacobi(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] p no. of upsamples
@@ -394,7 +395,7 @@ void rsvd_fixed_rank_jacobi(Args... args)
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
 void rsvd_fixed_rank_symmetric_jacobi(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
   raft::device_vector_view<ValueType, IndexType> S_vec,
   IndexType p,
@@ -434,7 +435,7 @@ void rsvd_fixed_rank_symmetric_jacobi(
                 true,
                 tol,
                 max_sweeps,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
 }
 
 /**
@@ -460,7 +461,7 @@ void rsvd_fixed_rank_symmetric_jacobi(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] PC_perc percentage of singular values to be computed
@@ -471,7 +472,7 @@ void rsvd_fixed_rank_symmetric_jacobi(Args... args)
  * raft::col_major
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void rsvd_perc(raft::device_resources const& handle,
+void rsvd_perc(raft::resources const& handle,
                raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
                raft::device_vector_view<ValueType, IndexType> S_vec,
                ValueType PC_perc,
@@ -510,7 +511,7 @@ void rsvd_perc(raft::device_resources const& handle,
            false,
            static_cast<ValueType>(0),
            0,
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /**
@@ -536,7 +537,7 @@ void rsvd_perc(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] PC_perc percentage of singular values to be computed
@@ -547,7 +548,7 @@ void rsvd_perc(Args... args)
  * raft::col_major
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void rsvd_perc_symmetric(raft::device_resources const& handle,
+void rsvd_perc_symmetric(raft::resources const& handle,
                          raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
                          raft::device_vector_view<ValueType, IndexType> S_vec,
                          ValueType PC_perc,
@@ -586,7 +587,7 @@ void rsvd_perc_symmetric(raft::device_resources const& handle,
            false,
            static_cast<ValueType>(0),
            0,
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /**
@@ -612,7 +613,7 @@ void rsvd_perc_symmetric(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] PC_perc percentage of singular values to be computed
@@ -625,7 +626,7 @@ void rsvd_perc_symmetric(Args... args)
  * raft::col_major
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void rsvd_perc_jacobi(raft::device_resources const& handle,
+void rsvd_perc_jacobi(raft::resources const& handle,
                       raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
                       raft::device_vector_view<ValueType, IndexType> S_vec,
                       ValueType PC_perc,
@@ -666,7 +667,7 @@ void rsvd_perc_jacobi(raft::device_resources const& handle,
            true,
            tol,
            max_sweeps,
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /**
@@ -692,7 +693,7 @@ void rsvd_perc_jacobi(Args... args)
  * U_in
  * @tparam VType std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> @c
  * V_in
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] M input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S_vec singular values raft::device_vector_view of shape (K)
  * @param[in] PC_perc percentage of singular values to be computed
@@ -706,7 +707,7 @@ void rsvd_perc_jacobi(Args... args)
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
 void rsvd_perc_symmetric_jacobi(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> M,
   raft::device_vector_view<ValueType, IndexType> S_vec,
   ValueType PC_perc,
@@ -747,7 +748,7 @@ void rsvd_perc_symmetric_jacobi(
            true,
            tol,
            max_sweeps,
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /**
diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh
index eecc719617..99754c4eb2 100644
--- a/cpp/include/raft/linalg/sqrt.cuh
+++ b/cpp/include/raft/linalg/sqrt.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/unary_op.cuh>
 
 namespace raft {
@@ -51,7 +52,7 @@ void sqrt(out_t* out, const in_t* in, IdxType len, cudaStream_t stream)
  * @brief Elementwise sqrt operation
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in     Input
  * @param[out] out    Output
  */
@@ -59,7 +60,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void sqrt(raft::device_resources const& handle, InType in, OutType out)
+void sqrt(raft::resources const& handle, InType in, OutType out)
 {
   using in_value_t  = typename InType::value_type;
   using out_value_t = typename OutType::value_type;
@@ -72,12 +73,12 @@ void sqrt(raft::device_resources const& handle, InType in, OutType out)
     sqrt<in_value_t, out_value_t, std::uint32_t>(out.data_handle(),
                                                  in.data_handle(),
                                                  static_cast<std::uint32_t>(out.size()),
-                                                 handle.get_stream());
+                                                 resource::get_cuda_stream(handle));
   } else {
     sqrt<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                  in.data_handle(),
                                                  static_cast<std::uint64_t>(out.size()),
-                                                 handle.get_stream());
+                                                 resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh
index 25be368865..f971d0e40b 100644
--- a/cpp/include/raft/linalg/strided_reduction.cuh
+++ b/cpp/include/raft/linalg/strided_reduction.cuh
@@ -20,10 +20,11 @@
 #pragma once
 
 #include "detail/strided_reduction.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 
 #include <type_traits>
 
@@ -112,7 +113,7 @@ void stridedReduction(OutType* dots,
  * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm)
  * It must be a 'callable' supporting the following input and output:
  * <pre>OutType (*FinalLambda)(OutType);</pre>
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] data Input of type raft::device_matrix_view
  * @param[out] dots Output of type raft::device_matrix_view
  * @param[in] init initial value to use for the reduction
@@ -128,7 +129,7 @@ template <typename InValueType,
           typename MainLambda   = raft::identity_op,
           typename ReduceLambda = raft::add_op,
           typename FinalLambda  = raft::identity_op>
-void strided_reduction(raft::device_resources const& handle,
+void strided_reduction(raft::resources const& handle,
                        raft::device_matrix_view<const InValueType, IndexType, LayoutPolicy> data,
                        raft::device_vector_view<OutValueType, IndexType> dots,
                        OutValueType init,
@@ -146,7 +147,7 @@ void strided_reduction(raft::device_resources const& handle,
                      data.extent(1),
                      data.extent(0),
                      init,
-                     handle.get_stream(),
+                     resource::get_cuda_stream(handle),
                      inplace,
                      main_op,
                      reduce_op,
@@ -160,7 +161,7 @@ void strided_reduction(raft::device_resources const& handle,
                      data.extent(0),
                      data.extent(1),
                      init,
-                     handle.get_stream(),
+                     resource::get_cuda_stream(handle),
                      inplace,
                      main_op,
                      reduce_op,
diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh
index cbd6b9df59..688e60a806 100644
--- a/cpp/include/raft/linalg/subtract.cuh
+++ b/cpp/include/raft/linalg/subtract.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include "detail/subtract.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -97,7 +98,7 @@ void subtractDevScalar(math_t* outDev,
  * @brief Elementwise subtraction operation on the input buffers
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
- * @param handle raft::device_resources
+ * @param handle raft::resources
  * @param[in] in1    First Input
  * @param[in] in2    Second Input
  * @param[out] out    Output
@@ -106,7 +107,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void subtract(raft::device_resources const& handle, InType in1, InType in2, OutType out)
+void subtract(raft::resources const& handle, InType in1, InType in2, OutType out)
 {
   using in_value_t  = typename InType::value_type;
   using out_value_t = typename OutType::value_type;
@@ -122,13 +123,13 @@ void subtract(raft::device_resources const& handle, InType in1, InType in2, OutT
                                                      in1.data_handle(),
                                                      in2.data_handle(),
                                                      static_cast<std::uint32_t>(out.size()),
-                                                     handle.get_stream());
+                                                     resource::get_cuda_stream(handle));
   } else {
     subtract<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                      in1.data_handle(),
                                                      in2.data_handle(),
                                                      static_cast<std::uint64_t>(out.size()),
-                                                     handle.get_stream());
+                                                     resource::get_cuda_stream(handle));
   }
 }
 
@@ -137,7 +138,7 @@ void subtract(raft::device_resources const& handle, InType in1, InType in2, OutT
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[out] out    Output
  * @param[in] scalar    raft::device_scalar_view
@@ -148,7 +149,7 @@ template <typename InType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
 void subtract_scalar(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   InType in,
   OutType out,
   raft::device_scalar_view<const typename InType::element_type, ScalarIdxType> scalar)
@@ -166,14 +167,14 @@ void subtract_scalar(
       in.data_handle(),
       scalar.data_handle(),
       static_cast<std::uint32_t>(out.size()),
-      handle.get_stream());
+      resource::get_cuda_stream(handle));
   } else {
     subtractDevScalar<in_value_t, out_value_t, std::uint64_t>(
       out.data_handle(),
       in.data_handle(),
       scalar.data_handle(),
       static_cast<std::uint64_t>(out.size()),
-      handle.get_stream());
+      resource::get_cuda_stream(handle));
   }
 }
 
@@ -182,7 +183,7 @@ void subtract_scalar(
  * @tparam InType    Input Type raft::device_mdspan
  * @tparam OutType   Output Type raft::device_mdspan
  * @tparam ScalarIdxType Index Type of scalar
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in    Input
  * @param[out] out    Output
  * @param[in] scalar    raft::host_scalar_view
@@ -193,7 +194,7 @@ template <typename InType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
 void subtract_scalar(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   InType in,
   OutType out,
   raft::host_scalar_view<const typename InType::element_type, ScalarIdxType> scalar)
@@ -210,13 +211,13 @@ void subtract_scalar(
                                                            in.data_handle(),
                                                            *scalar.data_handle(),
                                                            static_cast<std::uint32_t>(out.size()),
-                                                           handle.get_stream());
+                                                           resource::get_cuda_stream(handle));
   } else {
     subtractScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
                                                            in.data_handle(),
                                                            *scalar.data_handle(),
                                                            static_cast<std::uint64_t>(out.size()),
-                                                           handle.get_stream());
+                                                           resource::get_cuda_stream(handle));
   }
 }
 
diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh
index 801d271fe9..08f9462ba9 100644
--- a/cpp/include/raft/linalg/svd.cuh
+++ b/cpp/include/raft/linalg/svd.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "detail/svd.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <optional>
 
@@ -41,7 +42,7 @@ namespace linalg {
  * @param stream cuda stream
  */
 template <typename T>
-void svdQR(raft::device_resources const& handle,
+void svdQR(raft::resources const& handle,
            T* in,
            int n_rows,
            int n_cols,
@@ -67,7 +68,7 @@ void svdQR(raft::device_resources const& handle,
 }
 
 template <typename math_t, typename idx_t>
-void svdEig(raft::device_resources const& handle,
+void svdEig(raft::resources const& handle,
             math_t* in,
             idx_t n_rows,
             idx_t n_cols,
@@ -98,7 +99,7 @@ void svdEig(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-void svdJacobi(raft::device_resources const& handle,
+void svdJacobi(raft::resources const& handle,
                math_t* in,
                int n_rows,
                int n_cols,
@@ -139,7 +140,7 @@ void svdJacobi(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-void svdReconstruction(raft::device_resources const& handle,
+void svdReconstruction(raft::resources const& handle,
                        math_t* U,
                        math_t* S,
                        math_t* V,
@@ -167,7 +168,7 @@ void svdReconstruction(raft::device_resources const& handle,
  * @param stream cuda stream
  */
 template <typename math_t>
-bool evaluateSVDByL2Norm(raft::device_resources const& handle,
+bool evaluateSVDByL2Norm(raft::resources const& handle,
                          math_t* A_d,
                          math_t* U,
                          math_t* S_vec,
@@ -191,7 +192,7 @@ bool evaluateSVDByL2Norm(raft::device_resources const& handle,
  * matrix using QR decomposition
  * @tparam ValueType value type of parameters
  * @tparam IndexType index type of parameters
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] sing_vals singular values raft::device_vector_view of shape (K)
  * @param[out] U std::optional left singular values of raft::device_matrix_view with layout
@@ -201,7 +202,7 @@ bool evaluateSVDByL2Norm(raft::device_resources const& handle,
  */
 template <typename ValueType, typename IndexType>
 void svd_qr(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
   raft::device_vector_view<ValueType, IndexType> sing_vals,
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> U = std::nullopt,
@@ -230,7 +231,7 @@ void svd_qr(
         false,
         U.has_value(),
         V.has_value(),
-        handle.get_stream());
+        resource::get_cuda_stream(handle));
 }
 
 /**
@@ -241,7 +242,7 @@ void svd_qr(
  * Please see above for documentation of `svd_qr`.
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
-void svd_qr(raft::device_resources const& handle,
+void svd_qr(raft::resources const& handle,
             raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
             raft::device_vector_view<ValueType, IndexType> sing_vals,
             UType&& U_in = std::nullopt,
@@ -260,7 +261,7 @@ void svd_qr(raft::device_resources const& handle,
  * matrix using QR decomposition. Right singular vector matrix is transposed before returning
  * @tparam ValueType value type of parameters
  * @tparam IndexType index type of parameters
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] sing_vals singular values raft::device_vector_view of shape (K)
  * @param[out] U std::optional left singular values of raft::device_matrix_view with layout
@@ -270,7 +271,7 @@ void svd_qr(raft::device_resources const& handle,
  */
 template <typename ValueType, typename IndexType>
 void svd_qr_transpose_right_vec(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
   raft::device_vector_view<ValueType, IndexType> sing_vals,
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> U = std::nullopt,
@@ -299,7 +300,7 @@ void svd_qr_transpose_right_vec(
         true,
         U.has_value(),
         V.has_value(),
-        handle.get_stream());
+        resource::get_cuda_stream(handle));
 }
 
 /**
@@ -311,7 +312,7 @@ void svd_qr_transpose_right_vec(
  */
 template <typename ValueType, typename IndexType, typename UType, typename VType>
 void svd_qr_transpose_right_vec(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
   raft::device_vector_view<ValueType, IndexType> sing_vals,
   UType&& U_in = std::nullopt,
@@ -328,7 +329,7 @@ void svd_qr_transpose_right_vec(
 /**
  * @brief singular value decomposition (SVD) on a column major
  * matrix using Eigen decomposition. A square symmetric covariance matrix is constructed for the SVD
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in input raft::device_matrix_view with layout raft::col_major of shape (M, N)
  * @param[out] S singular values raft::device_vector_view of shape (K)
  * @param[out] V right singular values of raft::device_matrix_view with layout
@@ -338,7 +339,7 @@ void svd_qr_transpose_right_vec(
  */
 template <typename ValueType, typename IndexType>
 void svd_eig(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
   raft::device_vector_view<ValueType, IndexType> S,
   raft::device_matrix_view<ValueType, IndexType, raft::col_major> V,
@@ -360,11 +361,11 @@ void svd_eig(
          left_sing_vecs_ptr,
          V.data_handle(),
          U.has_value(),
-         handle.get_stream());
+         resource::get_cuda_stream(handle));
 }
 
 template <typename ValueType, typename IndexType, typename UType>
-void svd_eig(raft::device_resources const& handle,
+void svd_eig(raft::resources const& handle,
              raft::device_matrix_view<const ValueType, IndexType, raft::col_major> in,
              raft::device_vector_view<ValueType, IndexType> S,
              raft::device_matrix_view<ValueType, IndexType, raft::col_major> V,
@@ -378,7 +379,7 @@ void svd_eig(raft::device_resources const& handle,
 /**
  * @brief reconstruct a matrix use left and right singular vectors and
  * singular values
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] U left singular values of raft::device_matrix_view with layout
  * raft::col_major and dimensions (m, k)
  * @param[in] S square matrix with singular values on its diagonal of shape (k, k)
@@ -387,7 +388,7 @@ void svd_eig(raft::device_resources const& handle,
  * @param[out] out output raft::device_matrix_view with layout raft::col_major of shape (m, n)
  */
 template <typename ValueType, typename IndexType>
-void svd_reconstruction(raft::device_resources const& handle,
+void svd_reconstruction(raft::resources const& handle,
                         raft::device_matrix_view<const ValueType, IndexType, raft::col_major> U,
                         raft::device_matrix_view<const ValueType, IndexType, raft::col_major> S,
                         raft::device_matrix_view<const ValueType, IndexType, raft::col_major> V,
@@ -410,7 +411,7 @@ void svd_reconstruction(raft::device_resources const& handle,
                     out.extent(0),
                     out.extent(1),
                     S.extent(0),
-                    handle.get_stream());
+                    resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group svd
diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh
index ce95e98499..f46133abd9 100644
--- a/cpp/include/raft/linalg/ternary_op.cuh
+++ b/cpp/include/raft/linalg/ternary_op.cuh
@@ -20,7 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/map.cuh>
 
 namespace raft {
@@ -61,7 +61,7 @@ void ternaryOp(out_t* out,
  * @tparam InType Input Type raft::device_mdspan
  * @tparam Lambda the device-lambda performing the actual operation
  * @tparam OutType Output Type raft::device_mdspan
- * @param[in] handle raft::device_resources
+ * @param[in] handle raft::resources
  * @param[in] in1 First input
  * @param[in] in2 Second input
  * @param[in] in3 Third input
@@ -76,7 +76,7 @@ template <typename InType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
 void ternary_op(
-  raft::device_resources const& handle, InType in1, InType in2, InType in3, OutType out, Lambda op)
+  raft::resources const& handle, InType in1, InType in2, InType in3, OutType out, Lambda op)
 {
   return map(handle, in1, in2, in3, out, op);
 }
diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh
index 58ff2f6bd6..47a432f415 100644
--- a/cpp/include/raft/linalg/unary_op.cuh
+++ b/cpp/include/raft/linalg/unary_op.cuh
@@ -19,7 +19,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/map.cuh>
 
 namespace raft {
@@ -97,7 +98,7 @@ template <typename InType,
           typename OutType,
           typename = raft::enable_if_input_device_mdspan<InType>,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void unary_op(raft::device_resources const& handle, InType in, OutType out, Lambda op)
+void unary_op(raft::resources const& handle, InType in, OutType out, Lambda op)
 {
   return map(handle, in, out, op);
 }
@@ -117,9 +118,9 @@ void unary_op(raft::device_resources const& handle, InType in, OutType out, Lamb
 template <typename OutType,
           typename Lambda,
           typename = raft::enable_if_output_device_mdspan<OutType>>
-void write_only_unary_op(const raft::device_resources& handle, OutType out, Lambda op)
+void write_only_unary_op(const raft::resources& handle, OutType out, Lambda op)
 {
-  return writeOnlyUnaryOp(out.data_handle(), out.size(), op, handle.get_stream());
+  return writeOnlyUnaryOp(out.data_handle(), out.size(), op, resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group unary_op
diff --git a/cpp/include/raft/matrix/argmax.cuh b/cpp/include/raft/matrix/argmax.cuh
index 433c161079..e6df03567f 100644
--- a/cpp/include/raft/matrix/argmax.cuh
+++ b/cpp/include/raft/matrix/argmax.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/math.cuh>
 
 namespace raft::matrix {
@@ -33,14 +34,17 @@ namespace raft::matrix {
  * @param[out] out: output vector of size n_rows
  */
 template <typename math_t, typename idx_t, typename matrix_idx_t>
-void argmax(raft::device_resources const& handle,
+void argmax(raft::resources const& handle,
             raft::device_matrix_view<const math_t, matrix_idx_t, row_major> in,
             raft::device_vector_view<idx_t, matrix_idx_t> out)
 {
   RAFT_EXPECTS(out.extent(0) == in.extent(0),
                "Size of output vector must equal number of rows in input matrix.");
-  detail::argmax(
-    in.data_handle(), in.extent(1), in.extent(0), out.data_handle(), handle.get_stream());
+  detail::argmax(in.data_handle(),
+                 in.extent(1),
+                 in.extent(0),
+                 out.data_handle(),
+                 resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group argmax
diff --git a/cpp/include/raft/matrix/argmin.cuh b/cpp/include/raft/matrix/argmin.cuh
index 31ef0c1c1b..5e88b68cd5 100644
--- a/cpp/include/raft/matrix/argmin.cuh
+++ b/cpp/include/raft/matrix/argmin.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/math.cuh>
 
 namespace raft::matrix {
@@ -33,14 +34,17 @@ namespace raft::matrix {
  * @param[out] out: output vector of size n_rows
  */
 template <typename math_t, typename idx_t, typename matrix_idx_t>
-void argmin(raft::device_resources const& handle,
+void argmin(raft::resources const& handle,
             raft::device_matrix_view<const math_t, matrix_idx_t, row_major> in,
             raft::device_vector_view<idx_t, matrix_idx_t> out)
 {
   RAFT_EXPECTS(out.extent(0) == in.extent(0),
                "Size of output vector must equal number of rows in input matrix.");
-  detail::argmin(
-    in.data_handle(), in.extent(1), in.extent(0), out.data_handle(), handle.get_stream());
+  detail::argmin(in.data_handle(),
+                 in.extent(1),
+                 in.extent(0),
+                 out.data_handle(),
+                 resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group argmin
diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh
index 6546a48279..887741ad71 100644
--- a/cpp/include/raft/matrix/col_wise_sort.cuh
+++ b/cpp/include/raft/matrix/col_wise_sort.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/columnWiseSort.cuh>
 
 namespace raft::matrix {
@@ -71,7 +72,7 @@ void sort_cols_per_row(const InType* in,
  * @param[out] sorted_keys_opt: std::optional, output matrix for sorted keys (input)
  */
 template <typename in_t, typename out_t, typename matrix_idx_t, typename sorted_keys_t>
-void sort_cols_per_row(raft::device_resources const& handle,
+void sort_cols_per_row(raft::resources const& handle,
                        raft::device_matrix_view<const in_t, matrix_idx_t, raft::row_major> in,
                        raft::device_matrix_view<out_t, matrix_idx_t, raft::row_major> out,
                        sorted_keys_t&& sorted_keys_opt)
@@ -100,7 +101,7 @@ void sort_cols_per_row(raft::device_resources const& handle,
                                          alloc_workspace,
                                          (void*)nullptr,
                                          workspace_size,
-                                         handle.get_stream(),
+                                         resource::get_cuda_stream(handle),
                                          keys);
 
   if (alloc_workspace) {
@@ -113,7 +114,7 @@ void sort_cols_per_row(raft::device_resources const& handle,
                                            alloc_workspace,
                                            (void*)workspace.data_handle(),
                                            workspace_size,
-                                           handle.get_stream(),
+                                           resource::get_cuda_stream(handle),
                                            keys);
   }
 }
diff --git a/cpp/include/raft/matrix/copy.cuh b/cpp/include/raft/matrix/copy.cuh
index e4e5526e71..be83a4a19e 100644
--- a/cpp/include/raft/matrix/copy.cuh
+++ b/cpp/include/raft/matrix/copy.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 #include <raft/util/input_validation.hpp>
 
@@ -39,7 +40,7 @@ namespace raft::matrix {
  * @param[in] indices of the rows to be copied
  */
 template <typename m_t, typename idx_t, typename layout>
-void copy_rows(raft::device_resources const& handle,
+void copy_rows(raft::resources const& handle,
                raft::device_matrix_view<const m_t, idx_t, layout> in,
                raft::device_matrix_view<m_t, idx_t, layout> out,
                raft::device_vector_view<const idx_t, idx_t> indices)
@@ -54,7 +55,7 @@ void copy_rows(raft::device_resources const& handle,
                    out.data_handle(),
                    indices.data_handle(),
                    indices.extent(0),
-                   handle.get_stream(),
+                   resource::get_cuda_stream(handle),
                    raft::is_row_major(in));
 }
 
@@ -65,15 +66,17 @@ void copy_rows(raft::device_resources const& handle,
  * @param[out] out: output matrix
  */
 template <typename m_t, typename matrix_idx_t>
-void copy(raft::device_resources const& handle,
+void copy(raft::resources const& handle,
           raft::device_matrix_view<const m_t, matrix_idx_t, row_major> in,
           raft::device_matrix_view<m_t, matrix_idx_t, row_major> out)
 {
   RAFT_EXPECTS(in.extent(0) == out.extent(0) && in.extent(1) == out.extent(1),
                "Input and output matrix shapes must match.");
 
-  raft::copy_async(
-    out.data_handle(), in.data_handle(), in.extent(0) * out.extent(1), handle.get_stream());
+  raft::copy_async(out.data_handle(),
+                   in.data_handle(),
+                   in.extent(0) * out.extent(1),
+                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -83,15 +86,17 @@ void copy(raft::device_resources const& handle,
  * @param[out] out: output matrix
  */
 template <typename m_t, typename matrix_idx_t>
-void copy(raft::device_resources const& handle,
+void copy(raft::resources const& handle,
           raft::device_matrix_view<const m_t, matrix_idx_t, col_major> in,
           raft::device_matrix_view<m_t, matrix_idx_t, col_major> out)
 {
   RAFT_EXPECTS(in.extent(0) == out.extent(0) && in.extent(1) == out.extent(1),
                "Input and output matrix shapes must match.");
 
-  raft::copy_async(
-    out.data_handle(), in.data_handle(), in.extent(0) * out.extent(1), handle.get_stream());
+  raft::copy_async(out.data_handle(),
+                   in.data_handle(),
+                   in.extent(0) * out.extent(1),
+                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -102,7 +107,7 @@ void copy(raft::device_resources const& handle,
  * @param out: output matrix
  */
 template <typename m_t, typename idx_t>
-void trunc_zero_origin(raft::device_resources const& handle,
+void trunc_zero_origin(raft::resources const& handle,
                        raft::device_matrix_view<const m_t, idx_t, col_major> in,
                        raft::device_matrix_view<m_t, idx_t, col_major> out)
 {
@@ -114,7 +119,7 @@ void trunc_zero_origin(raft::device_resources const& handle,
                                       out.data_handle(),
                                       out.extent(0),
                                       out.extent(1),
-                                      handle.get_stream());
+                                      resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group matrix_copy
diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh
index 96398e9c74..d2707e1254 100644
--- a/cpp/include/raft/matrix/detail/math.cuh
+++ b/cpp/include/raft/matrix/detail/math.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 #include <cub/cub.cuh>
 #include <raft/core/operators.hpp>
@@ -194,7 +194,7 @@ void setValue(math_t* out, const math_t* in, math_t scalar, int len, cudaStream_
 
 template <typename math_t, typename IdxType = int>
 void ratio(
-  raft::device_resources const& handle, math_t* src, math_t* dest, IdxType len, cudaStream_t stream)
+  raft::resources const& handle, math_t* src, math_t* dest, IdxType len, cudaStream_t stream)
 {
   auto d_src  = src;
   auto d_dest = dest;
diff --git a/cpp/include/raft/matrix/detail/matrix.cuh b/cpp/include/raft/matrix/detail/matrix.cuh
index ef3a873d90..6b6c00c391 100644
--- a/cpp/include/raft/matrix/detail/matrix.cuh
+++ b/cpp/include/raft/matrix/detail/matrix.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cublas_handle.hpp>
 #include <raft/util/cache_util.cuh>
 #include <raft/util/cuda_utils.cuh>
 
@@ -28,7 +29,7 @@
 #include <cstddef>
 #include <cuda_runtime.h>
 #include <cusolverDn.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
 
@@ -299,9 +300,9 @@ void getDiagonalInverseMatrix(m_t* in, idx_t len, cudaStream_t stream)
 }
 
 template <typename m_t, typename idx_t = int>
-m_t getL2Norm(raft::device_resources const& handle, const m_t* in, idx_t size, cudaStream_t stream)
+m_t getL2Norm(raft::resources const& handle, const m_t* in, idx_t size, cudaStream_t stream)
 {
-  cublasHandle_t cublasH = handle.get_cublas_handle();
+  cublasHandle_t cublasH = resource::get_cublas_handle(handle);
   m_t normval            = 0;
   RAFT_EXPECTS(
     std::is_integral_v<idx_t> && (std::size_t)size <= (std::size_t)std::numeric_limits<int>::max(),
diff --git a/cpp/include/raft/matrix/detail/print.hpp b/cpp/include/raft/matrix/detail/print.hpp
index 814c6a0b4b..0b93819b97 100644
--- a/cpp/include/raft/matrix/detail/print.hpp
+++ b/cpp/include/raft/matrix/detail/print.hpp
@@ -28,7 +28,7 @@
 #include <cstddef>
 #include <cuda_runtime.h>
 #include <cusolverDn.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/include/raft/matrix/detail/select_radix.cuh b/cpp/include/raft/matrix/detail/select_radix.cuh
index b7d02d6b52..edde924892 100644
--- a/cpp/include/raft/matrix/detail/select_radix.cuh
+++ b/cpp/include/raft/matrix/detail/select_radix.cuh
@@ -1128,7 +1128,8 @@ void select_k(const T* in,
     } else {
       auto out_idx_view =
         raft::make_device_vector_view(out_idx, static_cast<size_t>(len) * batch_size);
-      raft::device_resources handle(stream);
+      raft::resources handle;
+      resource::set_cuda_stream(handle, stream);
       raft::linalg::map_offset(handle, out_idx_view, raft::mod_const_op<IdxT>(len));
     }
     return;
diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh
index 7710789bfe..89950c2e14 100644
--- a/cpp/include/raft/matrix/gather.cuh
+++ b/cpp/include/raft/matrix/gather.cuh
@@ -17,7 +17,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/detail/gather.cuh>
 #include <raft/util/itertools.hpp>
 
@@ -210,7 +211,7 @@ template <typename matrix_t,
           typename map_t,
           typename idx_t,
           typename map_xform_t = raft::identity_op>
-void gather(const raft::device_resources& handle,
+void gather(const raft::resources& handle,
             raft::device_matrix_view<const matrix_t, idx_t, row_major> in,
             raft::device_vector_view<const map_t, idx_t> map,
             raft::device_matrix_view<matrix_t, idx_t, row_major> out,
@@ -229,7 +230,7 @@ void gather(const raft::device_resources& handle,
     map.extent(0),
     out.data_handle(),
     transform_op,
-    handle.get_stream());
+    resource::get_cuda_stream(handle));
 }
 
 /**
@@ -261,7 +262,7 @@ template <typename matrix_t,
           typename unary_pred_t,
           typename idx_t,
           typename map_xform_t = raft::identity_op>
-void gather_if(const raft::device_resources& handle,
+void gather_if(const raft::resources& handle,
                raft::device_matrix_view<const matrix_t, idx_t, row_major> in,
                raft::device_matrix_view<matrix_t, idx_t, row_major> out,
                raft::device_vector_view<const map_t, idx_t> map,
@@ -285,7 +286,7 @@ void gather_if(const raft::device_resources& handle,
                     out.data_handle(),
                     pred_op,
                     transform_op,
-                    handle.get_stream());
+                    resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group matrix_gather
diff --git a/cpp/include/raft/matrix/init.cuh b/cpp/include/raft/matrix/init.cuh
index 9611e044f4..2b35dcc1be 100644
--- a/cpp/include/raft/matrix/init.cuh
+++ b/cpp/include/raft/matrix/init.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/map.cuh>
 #include <raft/matrix/detail/math.cuh>
 
@@ -39,7 +40,7 @@ namespace raft::matrix {
  * @param[in] scalar scalar value to fill matrix elements
  */
 template <typename math_t, typename extents, typename layout>
-void fill(raft::device_resources const& handle,
+void fill(raft::resources const& handle,
           raft::device_mdspan<const math_t, extents, layout> in,
           raft::device_mdspan<math_t, extents, layout> out,
           raft::host_scalar_view<math_t> scalar)
@@ -47,8 +48,11 @@ void fill(raft::device_resources const& handle,
   RAFT_EXPECTS(raft::is_row_or_column_major(out), "Data layout not supported");
   RAFT_EXPECTS(in.size() == out.size(), "Input and output matrices must be the same size.");
   RAFT_EXPECTS(scalar.data_handle() != nullptr, "Empty scalar");
-  detail::setValue(
-    out.data_handle(), in.data_handle(), *(scalar.data_handle()), in.size(), handle.get_stream());
+  detail::setValue(out.data_handle(),
+                   in.data_handle(),
+                   *(scalar.data_handle()),
+                   in.size(),
+                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -61,7 +65,7 @@ void fill(raft::device_resources const& handle,
  * @param[in] scalar scalar value to fill matrix elements
  */
 template <typename math_t, typename extents, typename layout>
-void fill(raft::device_resources const& handle,
+void fill(raft::resources const& handle,
           raft::device_mdspan<math_t, extents, layout> inout,
           math_t scalar)
 {
diff --git a/cpp/include/raft/matrix/linewise_op.cuh b/cpp/include/raft/matrix/linewise_op.cuh
index f8e3555d9d..cbcd2e7091 100644
--- a/cpp/include/raft/matrix/linewise_op.cuh
+++ b/cpp/include/raft/matrix/linewise_op.cuh
@@ -17,7 +17,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/detail/linewise_op.cuh>
 
 namespace raft::matrix {
@@ -62,7 +63,7 @@ template <typename m_t,
           typename Lambda,
           typename... vec_t,
           typename = raft::enable_if_device_mdspan<vec_t...>>
-void linewise_op(raft::device_resources const& handle,
+void linewise_op(raft::resources const& handle,
                  raft::device_matrix_view<const m_t, idx_t, layout> in,
                  raft::device_matrix_view<m_t, idx_t, layout> out,
                  const bool alongLines,
@@ -87,7 +88,7 @@ void linewise_op(raft::device_resources const& handle,
                                                      nLines,
                                                      alongLines,
                                                      op,
-                                                     handle.get_stream(),
+                                                     resource::get_cuda_stream(handle),
                                                      vecs.data_handle()...);
 }
 
@@ -97,7 +98,7 @@ template <typename m_t,
           typename Lambda,
           typename... vec_t,
           typename = raft::enable_if_device_mdspan<vec_t...>>
-void linewise_op(raft::device_resources const& handle,
+void linewise_op(raft::resources const& handle,
                  raft::device_aligned_matrix_view<const m_t, idx_t, layout> in,
                  raft::device_aligned_matrix_view<m_t, idx_t, layout> out,
                  const bool alongLines,
@@ -116,8 +117,14 @@ void linewise_op(raft::device_resources const& handle,
   RAFT_EXPECTS(out.extent(0) == in.extent(0) && out.extent(1) == in.extent(1),
                "Input and output must have the same shape.");
 
-  detail::MatrixLinewiseOp<16, 256>::runPadded<m_t, idx_t>(
-    out, in, lineLen, nLines, alongLines, op, handle.get_stream(), vecs.data_handle()...);
+  detail::MatrixLinewiseOp<16, 256>::runPadded<m_t, idx_t>(out,
+                                                           in,
+                                                           lineLen,
+                                                           nLines,
+                                                           alongLines,
+                                                           op,
+                                                           resource::get_cuda_stream(handle),
+                                                           vecs.data_handle()...);
 }
 
 /** @} */  // end of group linewise_op
diff --git a/cpp/include/raft/matrix/math.cuh b/cpp/include/raft/matrix/math.cuh
index 7cbc212d75..598ac60faf 100644
--- a/cpp/include/raft/matrix/math.cuh
+++ b/cpp/include/raft/matrix/math.cuh
@@ -295,7 +295,7 @@ void setValue(math_t* out, const math_t* in, math_t scalar, int len, cudaStream_
  */
 template <typename math_t, typename IdxType = int>
 void ratio(
-  raft::device_resources const& handle, math_t* src, math_t* dest, IdxType len, cudaStream_t stream)
+  raft::resources const& handle, math_t* src, math_t* dest, IdxType len, cudaStream_t stream)
 {
   detail::ratio(handle, src, dest, len, stream);
 }
diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh
index 4e549a4ec5..bc553011c0 100644
--- a/cpp/include/raft/matrix/matrix.cuh
+++ b/cpp/include/raft/matrix/matrix.cuh
@@ -31,6 +31,7 @@
 #include "detail/linewise_op.cuh"
 #include "detail/matrix.cuh"
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/common/nvtx.hpp>
 
@@ -88,15 +89,17 @@ void copy(const m_t* in, m_t* out, idx_t n_rows, idx_t n_cols, cudaStream_t stre
  * @param[out] out: output matrix
  */
 template <typename m_t, typename idx_t = int, typename matrix_idx_t>
-void copy(raft::device_resources const& handle,
+void copy(raft::resources const& handle,
           raft::device_matrix_view<const m_t, matrix_idx_t, col_major> in,
           raft::device_matrix_view<m_t, matrix_idx_t, col_major> out)
 {
   RAFT_EXPECTS(in.extent(0) == out.extent(0) && in.extent(1) == out.extent(1),
                "Input and output matrix shapes must match.");
 
-  raft::copy_async(
-    out.data_handle(), in.data_handle(), in.extent(0) * out.extent(1), handle.get_stream());
+  raft::copy_async(out.data_handle(),
+                   in.data_handle(),
+                   in.extent(0) * out.extent(1),
+                   resource::get_cuda_stream(handle));
 }
 
 /**
@@ -252,7 +255,7 @@ void getDiagonalInverseMatrix(m_t* in, idx_t len, cudaStream_t stream)
  * @param stream: cuda stream
  */
 template <typename m_t, typename idx_t = int>
-m_t getL2Norm(raft::device_resources const& handle, m_t* in, idx_t size, cudaStream_t stream)
+m_t getL2Norm(raft::resources const& handle, m_t* in, idx_t size, cudaStream_t stream)
 {
   return detail::getL2Norm(handle, in, size, stream);
 }
diff --git a/cpp/include/raft/matrix/norm.cuh b/cpp/include/raft/matrix/norm.cuh
index eb94a19669..ecfdb19191 100644
--- a/cpp/include/raft/matrix/norm.cuh
+++ b/cpp/include/raft/matrix/norm.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -33,9 +34,9 @@ namespace raft::matrix {
  * @returns matrix l2 norm
  */
 template <typename m_t, typename idx_t>
-m_t l2_norm(raft::device_resources const& handle, raft::device_mdspan<const m_t, idx_t> in)
+m_t l2_norm(raft::resources const& handle, raft::device_mdspan<const m_t, idx_t> in)
 {
-  return detail::getL2Norm(handle, in.data_handle(), in.size(), handle.get_stream());
+  return detail::getL2Norm(handle, in.data_handle(), in.size(), resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group matrix_norm
diff --git a/cpp/include/raft/matrix/power.cuh b/cpp/include/raft/matrix/power.cuh
index c7c3757193..866889866c 100644
--- a/cpp/include/raft/matrix/power.cuh
+++ b/cpp/include/raft/matrix/power.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/math.cuh>
 
 namespace raft::matrix {
@@ -37,13 +38,14 @@ namespace raft::matrix {
  * @param[in] scalar: every element is multiplied with scalar.
  */
 template <typename math_t, typename idx_t, typename layout>
-void weighted_power(raft::device_resources const& handle,
+void weighted_power(raft::resources const& handle,
                     raft::device_matrix_view<const math_t, idx_t, layout> in,
                     raft::device_matrix_view<math_t, idx_t, layout> out,
                     math_t scalar)
 {
   RAFT_EXPECTS(in.size() == out.size(), "Size of input and output matrices must be equal");
-  detail::power(in.data_handle(), out.data_handle(), scalar, in.size(), handle.get_stream());
+  detail::power(
+    in.data_handle(), out.data_handle(), scalar, in.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -56,11 +58,11 @@ void weighted_power(raft::device_resources const& handle,
  * @param[in] scalar: every element is multiplied with scalar.
  */
 template <typename math_t, typename idx_t, typename layout>
-void weighted_power(raft::device_resources const& handle,
+void weighted_power(raft::resources const& handle,
                     raft::device_matrix_view<math_t, idx_t, layout> inout,
                     math_t scalar)
 {
-  detail::power(inout.data_handle(), scalar, inout.size(), handle.get_stream());
+  detail::power(inout.data_handle(), scalar, inout.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -72,10 +74,9 @@ void weighted_power(raft::device_resources const& handle,
  * @param[inout] inout: input matrix and also the result is stored
  */
 template <typename math_t, typename idx_t, typename layout>
-void power(raft::device_resources const& handle,
-           raft::device_matrix_view<math_t, idx_t, layout> inout)
+void power(raft::resources const& handle, raft::device_matrix_view<math_t, idx_t, layout> inout)
 {
-  detail::power<math_t>(inout.data_handle(), inout.size(), handle.get_stream());
+  detail::power<math_t>(inout.data_handle(), inout.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -89,12 +90,13 @@ void power(raft::device_resources const& handle,
  * @{
  */
 template <typename math_t, typename idx_t, typename layout>
-void power(raft::device_resources const& handle,
+void power(raft::resources const& handle,
            raft::device_matrix_view<const math_t, idx_t, layout> in,
            raft::device_matrix_view<math_t, idx_t, layout> out)
 {
   RAFT_EXPECTS(in.size() == out.size(), "Input and output matrices must be same size.");
-  detail::power<math_t>(in.data_handle(), out.data_handle(), in.size(), handle.get_stream());
+  detail::power<math_t>(
+    in.data_handle(), out.data_handle(), in.size(), resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group matrix_power
diff --git a/cpp/include/raft/matrix/print.cuh b/cpp/include/raft/matrix/print.cuh
index f2c2653211..8c5ddb931c 100644
--- a/cpp/include/raft/matrix/print.cuh
+++ b/cpp/include/raft/matrix/print.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 #include <raft/matrix/matrix_types.hpp>
 
@@ -37,7 +38,7 @@ namespace raft::matrix {
  * @param[in] separators: horizontal and vertical separator characters
  */
 template <typename m_t, typename idx_t>
-void print(raft::device_resources const& handle,
+void print(raft::resources const& handle,
            raft::device_matrix_view<const m_t, idx_t, col_major> in,
            print_separators& separators)
 {
@@ -46,7 +47,7 @@ void print(raft::device_resources const& handle,
                 in.extent(1),
                 separators.horizontal,
                 separators.vertical,
-                handle.get_stream());
+                resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group matrix_print
diff --git a/cpp/include/raft/matrix/ratio.cuh b/cpp/include/raft/matrix/ratio.cuh
index cd96d1ffbc..93e1447c05 100644
--- a/cpp/include/raft/matrix/ratio.cuh
+++ b/cpp/include/raft/matrix/ratio.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -36,12 +37,13 @@ namespace raft::matrix {
  * @param[out] dest: output matrix. The result is stored in the dest matrix
  */
 template <typename math_t, typename idx_t, typename layout>
-void ratio(raft::device_resources const& handle,
+void ratio(raft::resources const& handle,
            raft::device_matrix_view<const math_t, idx_t, layout> src,
            raft::device_matrix_view<math_t, idx_t, layout> dest)
 {
   RAFT_EXPECTS(src.size() == dest.size(), "Input and output matrices must be the same size.");
-  detail::ratio(handle, src.data_handle(), dest.data_handle(), src.size(), handle.get_stream());
+  detail::ratio(
+    handle, src.data_handle(), dest.data_handle(), src.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -53,11 +55,13 @@ void ratio(raft::device_resources const& handle,
  * @param[inout] inout: input matrix
  */
 template <typename math_t, typename idx_t, typename layout>
-void ratio(raft::device_resources const& handle,
-           raft::device_matrix_view<math_t, idx_t, layout> inout)
+void ratio(raft::resources const& handle, raft::device_matrix_view<math_t, idx_t, layout> inout)
 {
-  detail::ratio(
-    handle, inout.data_handle(), inout.data_handle(), inout.size(), handle.get_stream());
+  detail::ratio(handle,
+                inout.data_handle(),
+                inout.data_handle(),
+                inout.size(),
+                resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group matrix_ratio
diff --git a/cpp/include/raft/matrix/reciprocal.cuh b/cpp/include/raft/matrix/reciprocal.cuh
index aa2c48e143..0ecdc55762 100644
--- a/cpp/include/raft/matrix/reciprocal.cuh
+++ b/cpp/include/raft/matrix/reciprocal.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/math.cuh>
 
 namespace raft::matrix {
@@ -40,7 +41,7 @@ namespace raft::matrix {
  * @{
  */
 template <typename math_t, typename idx_t, typename layout>
-void reciprocal(raft::device_resources const& handle,
+void reciprocal(raft::resources const& handle,
                 raft::device_matrix_view<const math_t, idx_t, layout> in,
                 raft::device_matrix_view<math_t, idx_t, layout> out,
                 raft::host_scalar_view<math_t> scalar,
@@ -52,7 +53,7 @@ void reciprocal(raft::device_resources const& handle,
                              out.data_handle(),
                              *(scalar.data_handle()),
                              in.size(),
-                             handle.get_stream(),
+                             resource::get_cuda_stream(handle),
                              setzero,
                              thres);
 }
@@ -70,7 +71,7 @@ void reciprocal(raft::device_resources const& handle,
  * @{
  */
 template <typename math_t, typename idx_t, typename layout>
-void reciprocal(raft::device_resources const& handle,
+void reciprocal(raft::resources const& handle,
                 raft::device_matrix_view<math_t, idx_t, layout> inout,
                 raft::host_scalar_view<math_t> scalar,
                 bool setzero = false,
@@ -79,7 +80,7 @@ void reciprocal(raft::device_resources const& handle,
   detail::reciprocal<math_t>(inout.data_handle(),
                              *(scalar.data_handle()),
                              inout.size(),
-                             handle.get_stream(),
+                             resource::get_cuda_stream(handle),
                              setzero,
                              thres);
 }
diff --git a/cpp/include/raft/matrix/reverse.cuh b/cpp/include/raft/matrix/reverse.cuh
index 3aaec56fee..42057bb0f5 100644
--- a/cpp/include/raft/matrix/reverse.cuh
+++ b/cpp/include/raft/matrix/reverse.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 #include <raft/util/input_validation.hpp>
 
@@ -34,14 +35,16 @@ namespace raft::matrix {
  * @param[inout] inout: input and output matrix
  */
 template <typename m_t, typename idx_t, typename layout_t>
-void col_reverse(raft::device_resources const& handle,
+void col_reverse(raft::resources const& handle,
                  raft::device_matrix_view<m_t, idx_t, layout_t> inout)
 {
   RAFT_EXPECTS(raft::is_row_or_column_major(inout), "Unsupported matrix layout");
   if (raft::is_col_major(inout)) {
-    detail::colReverse(inout.data_handle(), inout.extent(0), inout.extent(1), handle.get_stream());
+    detail::colReverse(
+      inout.data_handle(), inout.extent(0), inout.extent(1), resource::get_cuda_stream(handle));
   } else {
-    detail::rowReverse(inout.data_handle(), inout.extent(1), inout.extent(0), handle.get_stream());
+    detail::rowReverse(
+      inout.data_handle(), inout.extent(1), inout.extent(0), resource::get_cuda_stream(handle));
   }
 }
 
@@ -52,14 +55,16 @@ void col_reverse(raft::device_resources const& handle,
  * @param[inout] inout: input and output matrix
  */
 template <typename m_t, typename idx_t, typename layout_t>
-void row_reverse(raft::device_resources const& handle,
+void row_reverse(raft::resources const& handle,
                  raft::device_matrix_view<m_t, idx_t, layout_t> inout)
 {
   RAFT_EXPECTS(raft::is_row_or_column_major(inout), "Unsupported matrix layout");
   if (raft::is_col_major(inout)) {
-    detail::rowReverse(inout.data_handle(), inout.extent(0), inout.extent(1), handle.get_stream());
+    detail::rowReverse(
+      inout.data_handle(), inout.extent(0), inout.extent(1), resource::get_cuda_stream(handle));
   } else {
-    detail::colReverse(inout.data_handle(), inout.extent(1), inout.extent(0), handle.get_stream());
+    detail::colReverse(
+      inout.data_handle(), inout.extent(1), inout.extent(0), resource::get_cuda_stream(handle));
   }
 }
 /** @} */  // end group matrix_reverse
diff --git a/cpp/include/raft/matrix/select_k.cuh b/cpp/include/raft/matrix/select_k.cuh
index 7951cbdb03..8e6dbaafa8 100644
--- a/cpp/include/raft/matrix/select_k.cuh
+++ b/cpp/include/raft/matrix/select_k.cuh
@@ -17,10 +17,11 @@
 #pragma once
 
 #include "detail/select_k.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/resources.hpp>
 
 #include <optional>
 
@@ -75,7 +76,7 @@ namespace raft::matrix {
  *   whether to select k smallest (true) or largest (false) keys.
  */
 template <typename T, typename IdxT>
-void select_k(const device_resources& handle,
+void select_k(const resources& handle,
               raft::device_matrix_view<const T, int64_t, row_major> in_val,
               std::optional<raft::device_matrix_view<const IdxT, int64_t, row_major>> in_idx,
               raft::device_matrix_view<T, int64_t, row_major> out_val,
@@ -102,7 +103,7 @@ void select_k(const device_resources& handle,
                                    out_val.data_handle(),
                                    out_idx.data_handle(),
                                    select_min,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end of group select_k
diff --git a/cpp/include/raft/matrix/sign_flip.cuh b/cpp/include/raft/matrix/sign_flip.cuh
index 93962fb67d..6a90ae2d2f 100644
--- a/cpp/include/raft/matrix/sign_flip.cuh
+++ b/cpp/include/raft/matrix/sign_flip.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -35,10 +36,11 @@ namespace raft::matrix {
  * @param[inout] inout: input matrix. Result also stored in this parameter
  */
 template <typename math_t, typename idx_t>
-void sign_flip(raft::device_resources const& handle,
+void sign_flip(raft::resources const& handle,
                raft::device_matrix_view<math_t, idx_t, col_major> inout)
 {
-  detail::signFlip(inout.data_handle(), inout.extent(0), inout.extent(1), handle.get_stream());
+  detail::signFlip(
+    inout.data_handle(), inout.extent(0), inout.extent(1), resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group matrix_sign_flip
diff --git a/cpp/include/raft/matrix/slice.cuh b/cpp/include/raft/matrix/slice.cuh
index 071a10a847..b739f1c732 100644
--- a/cpp/include/raft/matrix/slice.cuh
+++ b/cpp/include/raft/matrix/slice.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -50,7 +51,7 @@ struct slice_coordinates {
  * example: Slice the 2nd and 3rd columns of a 4x3 matrix: slice(handle, in, out, {0, 1, 4, 3});
  */
 template <typename m_t, typename idx_t>
-void slice(raft::device_resources const& handle,
+void slice(raft::resources const& handle,
            raft::device_matrix_view<const m_t, idx_t, col_major> in,
            raft::device_matrix_view<m_t, idx_t, col_major> out,
            slice_coordinates<idx_t> coords)
@@ -71,7 +72,7 @@ void slice(raft::device_resources const& handle,
                       coords.col1,
                       coords.row2,
                       coords.col2,
-                      handle.get_stream());
+                      resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group matrix_slice
diff --git a/cpp/include/raft/matrix/sqrt.cuh b/cpp/include/raft/matrix/sqrt.cuh
index 309ae3452f..389ba28033 100644
--- a/cpp/include/raft/matrix/sqrt.cuh
+++ b/cpp/include/raft/matrix/sqrt.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -37,12 +38,13 @@ namespace raft::matrix {
  * @param[out] out: output matrix. The result is stored in the out matrix
  */
 template <typename math_t, typename idx_t, typename layout>
-void sqrt(raft::device_resources const& handle,
+void sqrt(raft::resources const& handle,
           raft::device_matrix_view<const math_t, idx_t, layout> in,
           raft::device_matrix_view<math_t, idx_t, layout> out)
 {
   RAFT_EXPECTS(in.size() == out.size(), "Input and output matrices must have same size.");
-  detail::seqRoot(in.data_handle(), out.data_handle(), in.size(), handle.get_stream());
+  detail::seqRoot(
+    in.data_handle(), out.data_handle(), in.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -54,10 +56,9 @@ void sqrt(raft::device_resources const& handle,
  * @param[inout] inout: input matrix with in-place results
  */
 template <typename math_t, typename idx_t, typename layout>
-void sqrt(raft::device_resources const& handle,
-          raft::device_matrix_view<math_t, idx_t, layout> inout)
+void sqrt(raft::resources const& handle, raft::device_matrix_view<math_t, idx_t, layout> inout)
 {
-  detail::seqRoot(inout.data_handle(), inout.size(), handle.get_stream());
+  detail::seqRoot(inout.data_handle(), inout.size(), resource::get_cuda_stream(handle));
 }
 
 /**
@@ -72,7 +73,7 @@ void sqrt(raft::device_resources const& handle,
  * @param[in] set_neg_zero whether to set negative numbers to zero
  */
 template <typename math_t, typename idx_t, typename layout>
-void weighted_sqrt(raft::device_resources const& handle,
+void weighted_sqrt(raft::resources const& handle,
                    raft::device_matrix_view<const math_t, idx_t, layout> in,
                    raft::device_matrix_view<math_t, idx_t, layout> out,
                    raft::host_scalar_view<math_t> scalar,
@@ -83,7 +84,7 @@ void weighted_sqrt(raft::device_resources const& handle,
                   out.data_handle(),
                   *(scalar.data_handle()),
                   in.size(),
-                  handle.get_stream(),
+                  resource::get_cuda_stream(handle),
                   set_neg_zero);
 }
 
@@ -98,13 +99,16 @@ void weighted_sqrt(raft::device_resources const& handle,
  * @param[in] set_neg_zero whether to set negative numbers to zero
  */
 template <typename math_t, typename idx_t, typename layout>
-void weighted_sqrt(raft::device_resources const& handle,
+void weighted_sqrt(raft::resources const& handle,
                    raft::device_matrix_view<math_t, idx_t, layout> inout,
                    raft::host_scalar_view<math_t> scalar,
                    bool set_neg_zero = false)
 {
-  detail::seqRoot(
-    inout.data_handle(), *(scalar.data_handle()), inout.size(), handle.get_stream(), set_neg_zero);
+  detail::seqRoot(inout.data_handle(),
+                  *(scalar.data_handle()),
+                  inout.size(),
+                  resource::get_cuda_stream(handle),
+                  set_neg_zero);
 }
 
 /** @} */  // end group matrix_sqrt
diff --git a/cpp/include/raft/matrix/threshold.cuh b/cpp/include/raft/matrix/threshold.cuh
index 7dfb264d34..d137270374 100644
--- a/cpp/include/raft/matrix/threshold.cuh
+++ b/cpp/include/raft/matrix/threshold.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/detail/matrix.cuh>
 
 namespace raft::matrix {
@@ -37,14 +38,14 @@ namespace raft::matrix {
  * @param[in] thres threshold to set values to zero
  */
 template <typename math_t, typename idx_t, typename layout>
-void zero_small_values(raft::device_resources const& handle,
+void zero_small_values(raft::resources const& handle,
                        raft::device_matrix_view<const math_t, idx_t, layout> in,
                        raft::device_matrix_view<math_t, idx_t, layout> out,
                        math_t thres = 1e-15)
 {
   RAFT_EXPECTS(in.size() == out.size(), "Input and output matrices must have same size");
   detail::setSmallValuesZero(
-    out.data_handle(), in.data_handle(), in.size(), handle.get_stream(), thres);
+    out.data_handle(), in.data_handle(), in.size(), resource::get_cuda_stream(handle), thres);
 }
 
 /**
@@ -57,11 +58,12 @@ void zero_small_values(raft::device_resources const& handle,
  * @param thres: threshold
  */
 template <typename math_t, typename idx_t, typename layout>
-void zero_small_values(raft::device_resources const& handle,
+void zero_small_values(raft::resources const& handle,
                        raft::device_matrix_view<math_t, idx_t, layout> inout,
                        math_t thres = 1e-15)
 {
-  detail::setSmallValuesZero(inout.data_handle(), inout.size(), handle.get_stream(), thres);
+  detail::setSmallValuesZero(
+    inout.data_handle(), inout.size(), resource::get_cuda_stream(handle), thres);
 }
 
 /** @} */  // end group matrix_threshold
diff --git a/cpp/include/raft/neighbors/ball_cover-ext.cuh b/cpp/include/raft/neighbors/ball_cover-ext.cuh
index b6ab12d8e1..bc5fe934ab 100644
--- a/cpp/include/raft/neighbors/ball_cover-ext.cuh
+++ b/cpp/include/raft/neighbors/ball_cover-ext.cuh
@@ -25,11 +25,11 @@
 namespace raft::neighbors::ball_cover {
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void build_index(raft::device_resources const& handle,
+void build_index(raft::resources const& handle,
                  BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index) RAFT_EXPLICIT;
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
+void all_knn_query(raft::resources const& handle,
                    BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                    int_t k,
                    idx_t* inds,
@@ -38,7 +38,7 @@ void all_knn_query(raft::device_resources const& handle,
                    float weight                = 1.0) RAFT_EXPLICIT;
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
+void all_knn_query(raft::resources const& handle,
                    BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
                    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
@@ -47,7 +47,7 @@ void all_knn_query(raft::device_resources const& handle,
                    float weight                = 1.0) RAFT_EXPLICIT;
 
 template <typename idx_t, typename value_t, typename int_t>
-void knn_query(raft::device_resources const& handle,
+void knn_query(raft::resources const& handle,
                const BallCoverIndex<idx_t, value_t, int_t>& index,
                int_t k,
                const value_t* query,
@@ -58,7 +58,7 @@ void knn_query(raft::device_resources const& handle,
                float weight                = 1.0) RAFT_EXPLICIT;
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void knn_query(raft::device_resources const& handle,
+void knn_query(raft::resources const& handle,
                const BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,
                raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
@@ -74,12 +74,12 @@ void knn_query(raft::device_resources const& handle,
 #define instantiate_raft_neighbors_ball_cover(idx_t, value_t, int_t, matrix_idx_t)                 \
   extern template void                                                                             \
   raft::neighbors::ball_cover::build_index<idx_t, value_t, int_t, matrix_idx_t>(                   \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index);      \
                                                                                                    \
   extern template void                                                                             \
   raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(                 \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
     int_t k,                                                                                       \
     idx_t* inds,                                                                                   \
@@ -89,7 +89,7 @@ void knn_query(raft::device_resources const& handle,
                                                                                                    \
   extern template void                                                                             \
   raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(                 \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
     raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
     raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
@@ -98,7 +98,7 @@ void knn_query(raft::device_resources const& handle,
     float weight);                                                                                 \
                                                                                                    \
   extern template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t>(              \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t>& index,               \
     int_t k,                                                                                       \
     const value_t* query,                                                                          \
@@ -110,7 +110,7 @@ void knn_query(raft::device_resources const& handle,
                                                                                                    \
   extern template void                                                                             \
   raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t, matrix_idx_t>(                     \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index, \
     raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,                        \
     raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
diff --git a/cpp/include/raft/neighbors/ball_cover-inl.cuh b/cpp/include/raft/neighbors/ball_cover-inl.cuh
index 619c57a35a..c41ecf6ca2 100644
--- a/cpp/include/raft/neighbors/ball_cover-inl.cuh
+++ b/cpp/include/raft/neighbors/ball_cover-inl.cuh
@@ -39,12 +39,12 @@ namespace raft::neighbors::ball_cover {
  * Usage example:
  * @code{.cpp}
  *
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/ball_cover.cuh>
  *  #include <raft/distance/distance_types.hpp>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto metric = raft::distance::DistanceType::L2Expanded;
  *  BallCoverIndex index(handle, X, metric);
@@ -60,7 +60,7 @@ namespace raft::neighbors::ball_cover {
  * @param[inout] index an empty (and not previous built) instance of BallCoverIndex
  */
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void build_index(raft::device_resources const& handle,
+void build_index(raft::resources const& handle,
                  BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index)
 {
   ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
@@ -109,7 +109,7 @@ void build_index(raft::device_resources const& handle,
  *               looking in the closest landmark.
  */
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
+void all_knn_query(raft::resources const& handle,
                    BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                    int_t k,
                    idx_t* inds,
@@ -163,12 +163,12 @@ void all_knn_query(raft::device_resources const& handle,
  * Usage example:
  * @code{.cpp}
  *
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/ball_cover.cuh>
  *  #include <raft/distance/distance_types.hpp>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto metric = raft::distance::DistanceType::L2Expanded;
  *
@@ -202,7 +202,7 @@ void all_knn_query(raft::device_resources const& handle,
  *               looking in the closest landmark.
  */
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void all_knn_query(raft::device_resources const& handle,
+void all_knn_query(raft::resources const& handle,
                    BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                    raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
                    raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,
@@ -256,7 +256,7 @@ void all_knn_query(raft::device_resources const& handle,
  * @param[in] n_query_pts number of query points
  */
 template <typename idx_t, typename value_t, typename int_t>
-void knn_query(raft::device_resources const& handle,
+void knn_query(raft::resources const& handle,
                const BallCoverIndex<idx_t, value_t, int_t>& index,
                int_t k,
                const value_t* query,
@@ -311,12 +311,12 @@ void knn_query(raft::device_resources const& handle,
  * Usage example:
  * @code{.cpp}
  *
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/ball_cover.cuh>
  *  #include <raft/distance/distance_types.hpp>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto metric = raft::distance::DistanceType::L2Expanded;
  *
@@ -352,7 +352,7 @@ void knn_query(raft::device_resources const& handle,
  *               looking in the closest landmark.
  */
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void knn_query(raft::device_resources const& handle,
+void knn_query(raft::resources const& handle,
                const BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,
                raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,
diff --git a/cpp/include/raft/neighbors/ball_cover_types.hpp b/cpp/include/raft/neighbors/ball_cover_types.hpp
index 8cab1469fc..0a6ad8c407 100644
--- a/cpp/include/raft/neighbors/ball_cover_types.hpp
+++ b/cpp/include/raft/neighbors/ball_cover_types.hpp
@@ -19,7 +19,7 @@
 #include <cstdint>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -45,7 +45,7 @@ template <typename value_idx,
           typename matrix_idx = std::uint32_t>
 class BallCoverIndex {
  public:
-  explicit BallCoverIndex(raft::device_resources const& handle_,
+  explicit BallCoverIndex(raft::resources const& handle_,
                           const value_t* X_,
                           value_int m_,
                           value_int n_,
@@ -71,7 +71,7 @@ class BallCoverIndex {
   {
   }
 
-  explicit BallCoverIndex(raft::device_resources const& handle_,
+  explicit BallCoverIndex(raft::resources const& handle_,
                           raft::device_matrix_view<const value_t, matrix_idx, row_major> X_,
                           raft::distance::DistanceType metric_)
     : handle(handle_),
@@ -139,7 +139,7 @@ class BallCoverIndex {
   // This should only be set by internal functions
   void set_index_trained() { index_trained = true; }
 
-  raft::device_resources const& handle;
+  raft::resources const& handle;
 
   value_int m;
   value_int n;
diff --git a/cpp/include/raft/neighbors/brute_force-ext.cuh b/cpp/include/raft/neighbors/brute_force-ext.cuh
index 98a186db86..862db75866 100644
--- a/cpp/include/raft/neighbors/brute_force-ext.cuh
+++ b/cpp/include/raft/neighbors/brute_force-ext.cuh
@@ -16,9 +16,11 @@
 
 #pragma once
 
+#include <optional>
+
 #include <raft/core/device_mdspan.hpp>       // raft::device_matrix_view
-#include <raft/core/device_resources.hpp>    // raft::device_resources
 #include <raft/core/operators.hpp>           // raft::identity_op
+#include <raft/core/resources.hpp>           // raft::resources
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/util/raft_explicit.hpp>       // RAFT_EXPLICIT
 
@@ -28,7 +30,7 @@ namespace raft::neighbors::brute_force {
 
 template <typename value_t, typename idx_t>
 inline void knn_merge_parts(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, row_major> in_keys,
   raft::device_matrix_view<const idx_t, idx_t, row_major> in_values,
   raft::device_matrix_view<value_t, idx_t, row_major> out_keys,
@@ -42,7 +44,7 @@ template <typename idx_t,
           typename index_layout,
           typename search_layout,
           typename epilogue_op = raft::identity_op>
-void knn(raft::device_resources const& handle,
+void knn(raft::resources const& handle,
          std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index,
          raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,
          raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
@@ -53,7 +55,7 @@ void knn(raft::device_resources const& handle,
          epilogue_op distance_epilogue         = raft::identity_op()) RAFT_EXPLICIT;
 
 template <typename value_t, typename idx_t, typename idx_layout, typename query_layout>
-void fused_l2_knn(raft::device_resources const& handle,
+void fused_l2_knn(raft::resources const& handle,
                   raft::device_matrix_view<const value_t, idx_t, idx_layout> index,
                   raft::device_matrix_view<const value_t, idx_t, query_layout> query,
                   raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,
@@ -70,7 +72,7 @@ void fused_l2_knn(raft::device_resources const& handle,
   idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
   extern template void raft::neighbors::brute_force::                                       \
     knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
-      raft::device_resources const& handle,                                                 \
+      raft::resources const& handle,                                                        \
       std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
       raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
       raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
@@ -94,7 +96,7 @@ instantiate_raft_neighbors_brute_force_knn(
 #define instantiate_raft_neighbors_brute_force_fused_l2_knn(            \
   value_t, idx_t, idx_layout, query_layout)                             \
   extern template void raft::neighbors::brute_force::fused_l2_knn(      \
-    raft::device_resources const& handle,                               \
+    raft::resources const& handle,                                      \
     raft::device_matrix_view<const value_t, idx_t, idx_layout> index,   \
     raft::device_matrix_view<const value_t, idx_t, query_layout> query, \
     raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,         \
diff --git a/cpp/include/raft/neighbors/brute_force-inl.cuh b/cpp/include/raft/neighbors/brute_force-inl.cuh
index dac1a29c7f..b4de76037a 100644
--- a/cpp/include/raft/neighbors/brute_force-inl.cuh
+++ b/cpp/include/raft/neighbors/brute_force-inl.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/detail/knn_brute_force.cuh>
 #include <raft/spatial/knn/detail/fused_l2_knn.cuh>
@@ -53,11 +54,11 @@ namespace raft::neighbors::brute_force {
  *
  * Usage example:
  * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/brute_force.cuh>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  compute multiple knn graphs and aggregate row-wise
  *  (see detailed description above)
@@ -78,7 +79,7 @@ namespace raft::neighbors::brute_force {
  */
 template <typename value_t, typename idx_t>
 inline void knn_merge_parts(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, row_major> in_keys,
   raft::device_matrix_view<const idx_t, idx_t, row_major> in_values,
   raft::device_matrix_view<value_t, idx_t, row_major> out_keys,
@@ -102,7 +103,7 @@ inline void knn_merge_parts(
                           n_samples,
                           n_parts,
                           in_keys.extent(1),
-                          handle.get_stream(),
+                          resource::get_cuda_stream(handle),
                           translations.value_or(nullptr));
 }
 
@@ -115,12 +116,12 @@ inline void knn_merge_parts(
  *
  * Usage example:
  * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/brute_force.cuh>
  *  #include <raft/distance/distance_types.hpp>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
  *  brute_force::knn(handle, index, search, indices, distances, metric);
@@ -147,7 +148,7 @@ template <typename idx_t,
           typename index_layout,
           typename search_layout,
           typename epilogue_op = raft::identity_op>
-void knn(raft::device_resources const& handle,
+void knn(raft::resources const& handle,
          std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index,
          raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,
          raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,
@@ -208,12 +209,12 @@ void knn(raft::device_resources const& handle,
  *
  * Usage example:
  * @code{.cpp}
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/neighbors/brute_force.cuh>
  *  #include <raft/distance/distance_types.hpp>
  *  using namespace raft::neighbors;
  *
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto metric = raft::distance::DistanceType::L2SqrtExpanded;
  *  brute_force::fused_l2_knn(handle, index, search, indices, distances, metric);
@@ -231,7 +232,7 @@ void knn(raft::device_resources const& handle,
  * @param[in] metric type of distance computation to perform (must be a variant of L2)
  */
 template <typename value_t, typename idx_t, typename idx_layout, typename query_layout>
-void fused_l2_knn(raft::device_resources const& handle,
+void fused_l2_knn(raft::resources const& handle,
                   raft::device_matrix_view<const value_t, idx_t, idx_layout> index,
                   raft::device_matrix_view<const value_t, idx_t, query_layout> query,
                   raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,
@@ -271,7 +272,7 @@ void fused_l2_knn(raft::device_resources const& handle,
                                          k,
                                          rowMajorIndex,
                                          rowMajorQuery,
-                                         handle.get_stream(),
+                                         resource::get_cuda_stream(handle),
                                          metric);
 }
 
diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh
index 87d370b54a..19f65baf1a 100644
--- a/cpp/include/raft/neighbors/cagra.cuh
+++ b/cpp/include/raft/neighbors/cagra.cuh
@@ -21,9 +21,9 @@
 #include "detail/cagra/graph_core.cuh"
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_device_accessor.hpp>
 #include <raft/core/mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/neighbors/cagra_types.hpp>
 #include <rmm/cuda_stream_view.hpp>
 
@@ -74,7 +74,7 @@ namespace raft::neighbors::experimental::cagra {
  * @param[in] search_params (optional) ivf_pq search parameters
  */
 template <typename DataT, typename IdxT, typename accessor>
-void build_knn_graph(raft::device_resources const& res,
+void build_knn_graph(raft::resources const& res,
                      mdspan<const DataT, matrix_extent<IdxT>, row_major, accessor> dataset,
                      raft::host_matrix_view<IdxT, IdxT, row_major> knn_graph,
                      std::optional<float> refine_rate                   = std::nullopt,
@@ -120,7 +120,7 @@ template <typename DataT,
             host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>,
           typename g_accessor =
             host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
-void sort_knn_graph(raft::device_resources const& res,
+void sort_knn_graph(raft::resources const& res,
                     mdspan<const DataT, matrix_extent<IdxT>, row_major, d_accessor> dataset,
                     mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph)
 {
@@ -144,7 +144,7 @@ void sort_knn_graph(raft::device_resources const& res,
 template <typename IdxT = uint32_t,
           typename g_accessor =
             host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
-void prune(raft::device_resources const& res,
+void prune(raft::resources const& res,
            mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
            raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
 {
@@ -195,7 +195,7 @@ template <typename T,
           typename IdxT = uint32_t,
           typename Accessor =
             host_device_accessor<std::experimental::default_accessor<T>, memory_type::host>>
-index<T, IdxT> build(raft::device_resources const& res,
+index<T, IdxT> build(raft::resources const& res,
                      const index_params& params,
                      mdspan<const T, matrix_extent<IdxT>, row_major, Accessor> dataset)
 {
@@ -239,7 +239,7 @@ index<T, IdxT> build(raft::device_resources const& res,
  * k]
  */
 template <typename T, typename IdxT>
-void search(raft::device_resources const& res,
+void search(raft::resources const& res,
             const search_params& params,
             const index<T, IdxT>& idx,
             raft::device_matrix_view<const T, IdxT, row_major> queries,
diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh
index befd5e9c07..8d1771a301 100644
--- a/cpp/include/raft/neighbors/cagra_serialize.cuh
+++ b/cpp/include/raft/neighbors/cagra_serialize.cuh
@@ -31,9 +31,9 @@ namespace raft::neighbors::experimental::cagra {
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an output stream
  * std::ostream os(std::cout.rdbuf());
@@ -50,7 +50,7 @@ namespace raft::neighbors::experimental::cagra {
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle, std::ostream& os, const index<T, IdxT>& index)
+void serialize(raft::resources const& handle, std::ostream& os, const index<T, IdxT>& index)
 {
   detail::serialize(handle, os, index);
 }
@@ -61,9 +61,9 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -80,7 +80,7 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle,
+void serialize(raft::resources const& handle,
                const std::string& filename,
                const index<T, IdxT>& index)
 {
@@ -93,9 +93,9 @@ void serialize(raft::device_resources const& handle,
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an input stream
  * std::istream is(std::cin.rdbuf());
@@ -113,7 +113,7 @@ void serialize(raft::device_resources const& handle,
  * @return raft::neighbors::cagra::index<T, IdxT>
  */
 template <typename T, typename IdxT>
-index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& is)
+index<T, IdxT> deserialize(raft::resources const& handle, std::istream& is)
 {
   return detail::deserialize<T, IdxT>(handle, is);
 }
@@ -124,9 +124,9 @@ index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& i
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -144,7 +144,7 @@ index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& i
  * @return raft::neighbors::cagra::index<T, IdxT>
  */
 template <typename T, typename IdxT>
-index<T, IdxT> deserialize(raft::device_resources const& handle, const std::string& filename)
+index<T, IdxT> deserialize(raft::resources const& handle, const std::string& filename)
 {
   return detail::deserialize<T, IdxT>(handle, filename);
 }
diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp
index 931fb3f23f..87405ae9fb 100644
--- a/cpp/include/raft/neighbors/cagra_types.hpp
+++ b/cpp/include/raft/neighbors/cagra_types.hpp
@@ -17,12 +17,13 @@
 #pragma once
 
 #include "ann_types.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/mdspan_types.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/integer_utils.hpp>
 
@@ -162,7 +163,7 @@ struct index : ann::index {
   ~index()                               = default;
 
   /** Construct an empty index. */
-  index(raft::device_resources const& res)
+  index(raft::resources const& res)
     : ann::index(),
       metric_(raft::distance::DistanceType::L2Expanded),
       dataset_(make_device_matrix<T, IdxT>(res, 0, 0)),
@@ -172,7 +173,7 @@ struct index : ann::index {
 
   /** Construct an index from dataset and knn_graph arrays */
   template <typename data_accessor, typename graph_accessor>
-  index(raft::device_resources const& res,
+  index(raft::resources const& res,
         raft::distance::DistanceType metric,
         mdspan<const T, matrix_extent<IdxT>, row_major, data_accessor> dataset,
         mdspan<IdxT, matrix_extent<IdxT>, row_major, graph_accessor> knn_graph)
@@ -183,9 +184,15 @@ struct index : ann::index {
   {
     RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0),
                  "Dataset and knn_graph must have equal number of rows");
-    raft::copy(dataset_.data_handle(), dataset.data_handle(), dataset.size(), res.get_stream());
-    raft::copy(graph_.data_handle(), knn_graph.data_handle(), knn_graph.size(), res.get_stream());
-    res.sync_stream();
+    raft::copy(dataset_.data_handle(),
+               dataset.data_handle(),
+               dataset.size(),
+               resource::get_cuda_stream(res));
+    raft::copy(graph_.data_handle(),
+               knn_graph.data_handle(),
+               knn_graph.size(),
+               resource::get_cuda_stream(res));
+    resource::sync_stream(res);
   }
 
  private:
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
index 54c806ba13..f0eeb2b36c 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
@@ -19,6 +19,7 @@
 #include "graph_core.cuh"
 #include <chrono>
 #include <cstdio>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <raft/core/device_mdarray.hpp>
@@ -40,7 +41,7 @@ namespace raft::neighbors::experimental::cagra::detail {
 using INDEX_T = std::uint32_t;
 
 template <typename DataT, typename IdxT, typename accessor>
-void build_knn_graph(raft::device_resources const& res,
+void build_knn_graph(raft::resources const& res,
                      mdspan<const DataT, matrix_extent<IdxT>, row_major, accessor> dataset,
                      raft::host_matrix_view<IdxT, IdxT, row_major> knn_graph,
                      std::optional<float> refine_rate                   = std::nullopt,
@@ -132,12 +133,13 @@ void build_knn_graph(raft::device_resources const& res,
   auto pool_guard = raft::get_pool_memory_resource(device_memory, 1024 * 1024);
   if (pool_guard) { RAFT_LOG_DEBUG("ivf_pq using pool memory resource"); }
 
-  raft::spatial::knn::detail::utils::batch_load_iterator<DataT> vec_batches(dataset.data_handle(),
-                                                                            dataset.extent(0),
-                                                                            dataset.extent(1),
-                                                                            max_batch_size,
-                                                                            res.get_stream(),
-                                                                            device_memory);
+  raft::spatial::knn::detail::utils::batch_load_iterator<DataT> vec_batches(
+    dataset.data_handle(),
+    dataset.extent(0),
+    dataset.extent(1),
+    max_batch_size,
+    resource::get_cuda_stream(res),
+    device_memory);
 
   for (const auto& batch : vec_batches) {
     auto queries_view = raft::make_device_matrix_view<const DataT, int64_t>(
@@ -153,8 +155,11 @@ void build_knn_graph(raft::device_resources const& res,
       raft::copy(neighbors_host.data_handle(),
                  neighbors.data_handle(),
                  neighbors_view.size(),
-                 res.get_stream());
-      raft::copy(queries_host.data_handle(), batch.data(), queries_view.size(), res.get_stream());
+                 resource::get_cuda_stream(res));
+      raft::copy(queries_host.data_handle(),
+                 batch.data(),
+                 queries_view.size(),
+                 resource::get_cuda_stream(res));
       auto queries_host_view = make_host_matrix_view<const DataT, int64_t>(
         queries_host.data_handle(), batch.size(), batch.row_width());
       auto neighbors_host_view = make_host_matrix_view<const int64_t, int64_t>(
@@ -163,7 +168,7 @@ void build_knn_graph(raft::device_resources const& res,
         refined_neighbors_host.data_handle(), batch.size(), top_k);
       auto refined_distances_host_view = make_host_matrix_view<float, int64_t>(
         refined_distances_host.data_handle(), batch.size(), top_k);
-      res.sync_stream();
+      resource::sync_stream(res);
 
       raft::neighbors::detail::refine_host<int64_t, DataT, float, int64_t>(  // res,
         dataset,
@@ -193,8 +198,8 @@ void build_knn_graph(raft::device_resources const& res,
       raft::copy(refined_neighbors_host.data_handle(),
                  refined_neighbors_view.data_handle(),
                  refined_neighbors_view.size(),
-                 res.get_stream());
-      res.sync_stream();
+                 resource::get_cuda_stream(res));
+      resource::sync_stream(res);
     }
     // omit itself & write out
     // TODO(tfeher): do this in parallel with GPU processing of next batch
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
index 5902d1405f..0073f66d0b 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
@@ -16,11 +16,13 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/neighbors/detail/ivf_pq_search.cuh>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/neighbors/cagra_types.hpp>
 #include <rmm/cuda_stream_view.hpp>
 
@@ -51,7 +53,7 @@ namespace raft::neighbors::experimental::cagra::detail {
  */
 
 template <typename T, typename IdxT = uint32_t, typename DistanceT = float>
-void search_main(raft::device_resources const& res,
+void search_main(raft::resources const& res,
                  search_params params,
                  const index<T, IdxT>& index,
                  raft::device_matrix_view<const T, IdxT, row_major> queries,
@@ -112,7 +114,7 @@ void search_main(raft::device_resources const& res,
                                         distances.extent(0),
                                         distances.extent(1),
                                         kScale,
-                                        res.get_stream());
+                                        resource::get_cuda_stream(res));
 }
 /** @} */  // end group cagra
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
index 171f261cf3..04d0bb350f 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
@@ -49,7 +49,7 @@ template struct check_index_layout<sizeof(index<double, std::uint64_t>), 136>;
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& res, std::ostream& os, const index<T, IdxT>& index_)
+void serialize(raft::resources const& res, std::ostream& os, const index<T, IdxT>& index_)
 {
   RAFT_LOG_DEBUG(
     "Saving CAGRA index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());
@@ -64,7 +64,7 @@ void serialize(raft::device_resources const& res, std::ostream& os, const index<
 }
 
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& res,
+void serialize(raft::resources const& res,
                const std::string& filename,
                const index<T, IdxT>& index_)
 {
@@ -87,7 +87,7 @@ void serialize(raft::device_resources const& res,
  *
  */
 template <typename T, typename IdxT>
-auto deserialize(raft::device_resources const& res, std::istream& is) -> index<T, IdxT>
+auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
 {
   auto ver = deserialize_scalar<int>(res, is);
   if (ver != serialization_version) {
@@ -108,7 +108,7 @@ auto deserialize(raft::device_resources const& res, std::istream& is) -> index<T
 }
 
 template <typename T, typename IdxT>
-auto deserialize(raft::device_resources const& res, const std::string& filename) -> index<T, IdxT>
+auto deserialize(raft::resources const& res, const std::string& filename) -> index<T, IdxT>
 {
   std::ifstream is(filename, std::ios::in | std::ios::binary);
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh
index beeebc605c..7d4cfee0b9 100644
--- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh
@@ -29,12 +29,11 @@ class factory {
   /**
    * Create a search structure for dataset with dim features.
    */
-  static std::unique_ptr<search_plan_impl<T, IdxT, DistanceT>> create(
-    raft::device_resources const& res,
-    search_params const& params,
-    int64_t dim,
-    int64_t graph_degree,
-    uint32_t topk)
+  static std::unique_ptr<search_plan_impl<T, IdxT, DistanceT>> create(raft::resources const& res,
+                                                                      search_params const& params,
+                                                                      int64_t dim,
+                                                                      int64_t graph_degree,
+                                                                      uint32_t topk)
   {
     search_plan_impl_base plan(params, dim, graph_degree, topk);
     switch (plan.max_dim) {
@@ -70,7 +69,7 @@ class factory {
  private:
   template <unsigned MAX_DATASET_DIM, unsigned TEAM_SIZE>
   static std::unique_ptr<search_plan_impl<T, IdxT, DistanceT>> dispatch_kernel(
-    raft::device_resources const& res, search_plan_impl_base& plan)
+    raft::resources const& res, search_plan_impl_base& plan)
   {
     if (plan.algo == search_algo::SINGLE_CTA) {
       return std::unique_ptr<search_plan_impl<T, IdxT, DistanceT>>(
diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
index b7fffb4eaa..aa3f7dd29f 100644
--- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
@@ -23,9 +23,10 @@
 #include <memory>
 #include <omp.h>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_device_accessor.hpp>
 #include <raft/core/mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <random>
 #include <sys/time.h>
@@ -295,7 +296,7 @@ template <typename DataT,
             host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>,
           typename g_accessor =
             host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
-void sort_knn_graph(raft::device_resources const& res,
+void sort_knn_graph(raft::resources const& res,
                     mdspan<const DataT, matrix_extent<IdxT>, row_major, d_accessor> dataset,
                     mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph)
 {
@@ -318,12 +319,15 @@ void sort_knn_graph(raft::device_resources const& res,
   RAFT_LOG_DEBUG("# Sorting kNN Graph on GPUs ");
 
   auto d_dataset = raft::make_device_matrix<DataT, IdxT>(res, dataset_size, dataset_dim);
-  raft::copy(d_dataset.data_handle(), dataset_ptr, dataset_size * dataset_dim, res.get_stream());
+  raft::copy(d_dataset.data_handle(),
+             dataset_ptr,
+             dataset_size * dataset_dim,
+             resource::get_cuda_stream(res));
 
   raft::copy(d_input_graph.data_handle(),
              input_graph_ptr,
              graph_size * input_graph_degree,
-             res.get_stream());
+             resource::get_cuda_stream(res));
 
   void (*kernel_sort)(
     const DataT* const, const IdxT, const uint32_t, IdxT* const, const uint32_t, const uint32_t);
@@ -355,18 +359,19 @@ void sort_knn_graph(raft::device_resources const& res,
   }
   dim3 blocks_sort(graph_size, 1, 1);
   RAFT_LOG_DEBUG(".");
-  kernel_sort<<<blocks_sort, threads_sort, 0, res.get_stream()>>>(d_dataset.data_handle(),
-                                                                  dataset_size,
-                                                                  dataset_dim,
-                                                                  d_input_graph.data_handle(),
-                                                                  graph_size,
-                                                                  input_graph_degree);
-  res.sync_stream();
+  kernel_sort<<<blocks_sort, threads_sort, 0, resource::get_cuda_stream(res)>>>(
+    d_dataset.data_handle(),
+    dataset_size,
+    dataset_dim,
+    d_input_graph.data_handle(),
+    graph_size,
+    input_graph_degree);
+  resource::sync_stream(res);
   RAFT_LOG_DEBUG(".");
   raft::copy(input_graph_ptr,
              d_input_graph.data_handle(),
              graph_size * input_graph_degree,
-             res.get_stream());
+             resource::get_cuda_stream(res));
   RAFT_LOG_DEBUG("\n");
 
   const double time_sort_end = cur_time();
@@ -376,7 +381,7 @@ void sort_knn_graph(raft::device_resources const& res,
 template <typename IdxT = uint32_t,
           typename g_accessor =
             host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
-void prune(raft::device_resources const& res,
+void prune(raft::resources const& res,
            mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
            raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
 {
@@ -407,11 +412,13 @@ void prune(raft::device_resources const& res,
     RAFT_CUDA_TRY(cudaMemsetAsync(d_detour_count.data_handle(),
                                   0xff,
                                   graph_size * input_graph_degree * sizeof(uint8_t),
-                                  res.get_stream()));
+                                  resource::get_cuda_stream(res)));
 
     auto d_num_no_detour_edges = raft::make_device_vector<uint32_t, IdxT>(res, graph_size);
-    RAFT_CUDA_TRY(cudaMemsetAsync(
-      d_num_no_detour_edges.data_handle(), 0x00, graph_size * sizeof(uint32_t), res.get_stream()));
+    RAFT_CUDA_TRY(cudaMemsetAsync(d_num_no_detour_edges.data_handle(),
+                                  0x00,
+                                  graph_size * sizeof(uint32_t),
+                                  resource::get_cuda_stream(res)));
 
     auto dev_stats  = raft::make_device_vector<uint64_t>(res, 2);
     auto host_stats = raft::make_host_vector<uint64_t>(2);
@@ -435,7 +442,7 @@ void prune(raft::device_resources const& res,
     raft::copy(d_input_graph.data_handle(),
                input_graph_ptr,
                graph_size * input_graph_degree,
-               res.get_stream());
+               resource::get_cuda_stream(res));
     void (*kernel_prune)(const IdxT* const,
                          const uint32_t,
                          const uint32_t,
@@ -463,11 +470,11 @@ void prune(raft::device_resources const& res,
     const dim3 threads_prune(32, 1, 1);
     const dim3 blocks_prune(batch_size, 1, 1);
 
-    RAFT_CUDA_TRY(
-      cudaMemsetAsync(dev_stats.data_handle(), 0, sizeof(uint64_t) * 2, res.get_stream()));
+    RAFT_CUDA_TRY(cudaMemsetAsync(
+      dev_stats.data_handle(), 0, sizeof(uint64_t) * 2, resource::get_cuda_stream(res)));
 
     for (uint32_t i_batch = 0; i_batch < num_batch; i_batch++) {
-      kernel_prune<<<blocks_prune, threads_prune, 0, res.get_stream()>>>(
+      kernel_prune<<<blocks_prune, threads_prune, 0, resource::get_cuda_stream(res)>>>(
         d_input_graph.data_handle(),
         graph_size,
         input_graph_degree,
@@ -477,20 +484,21 @@ void prune(raft::device_resources const& res,
         d_detour_count.data_handle(),
         d_num_no_detour_edges.data_handle(),
         dev_stats.data_handle());
-      res.sync_stream();
+      resource::sync_stream(res);
       RAFT_LOG_DEBUG(
         "# Pruning kNN Graph on GPUs (%.1lf %%)\r",
         (double)std::min<IdxT>((i_batch + 1) * batch_size, graph_size) / graph_size * 100);
     }
-    res.sync_stream();
+    resource::sync_stream(res);
     RAFT_LOG_DEBUG("\n");
 
     raft::copy(detour_count.data_handle(),
                d_detour_count.data_handle(),
                graph_size * input_graph_degree,
-               res.get_stream());
+               resource::get_cuda_stream(res));
 
-    raft::copy(host_stats.data_handle(), dev_stats.data_handle(), 2, res.get_stream());
+    raft::copy(
+      host_stats.data_handle(), dev_stats.data_handle(), 2, resource::get_cuda_stream(res));
     const auto num_keep = host_stats.data_handle()[0];
     const auto num_full = host_stats.data_handle()[1];
 
@@ -538,11 +546,13 @@ void prune(raft::device_resources const& res,
     RAFT_CUDA_TRY(cudaMemsetAsync(d_rev_graph.data_handle(),
                                   0xff,
                                   graph_size * output_graph_degree * sizeof(IdxT),
-                                  res.get_stream()));
+                                  resource::get_cuda_stream(res)));
 
     auto d_rev_graph_count = raft::make_device_vector<uint32_t, IdxT>(res, graph_size);
-    RAFT_CUDA_TRY(cudaMemsetAsync(
-      d_rev_graph_count.data_handle(), 0x00, graph_size * sizeof(uint32_t), res.get_stream()));
+    RAFT_CUDA_TRY(cudaMemsetAsync(d_rev_graph_count.data_handle(),
+                                  0x00,
+                                  graph_size * sizeof(uint32_t),
+                                  resource::get_cuda_stream(res)));
 
     auto dest_nodes   = raft::make_host_vector<IdxT, IdxT>(graph_size);
     auto d_dest_nodes = raft::make_device_vector<IdxT, IdxT>(res, graph_size);
@@ -552,30 +562,35 @@ void prune(raft::device_resources const& res,
       for (uint64_t i = 0; i < graph_size; i++) {
         dest_nodes.data_handle()[i] = pruned_graph.data_handle()[k + (output_graph_degree * i)];
       }
-      res.sync_stream();
+      resource::sync_stream(res);
 
-      raft::copy(
-        d_dest_nodes.data_handle(), dest_nodes.data_handle(), graph_size, res.get_stream());
+      raft::copy(d_dest_nodes.data_handle(),
+                 dest_nodes.data_handle(),
+                 graph_size,
+                 resource::get_cuda_stream(res));
 
       dim3 threads(256, 1, 1);
       dim3 blocks(1024, 1, 1);
-      kern_make_rev_graph<<<blocks, threads, 0, res.get_stream()>>>(d_dest_nodes.data_handle(),
-                                                                    d_rev_graph.data_handle(),
-                                                                    d_rev_graph_count.data_handle(),
-                                                                    graph_size,
-                                                                    output_graph_degree);
+      kern_make_rev_graph<<<blocks, threads, 0, resource::get_cuda_stream(res)>>>(
+        d_dest_nodes.data_handle(),
+        d_rev_graph.data_handle(),
+        d_rev_graph_count.data_handle(),
+        graph_size,
+        output_graph_degree);
       RAFT_LOG_DEBUG("# Making reverse graph on GPUs: %lu / %u    \r", k, output_graph_degree);
     }
 
-    res.sync_stream();
+    resource::sync_stream(res);
     RAFT_LOG_DEBUG("\n");
 
     raft::copy(rev_graph.data_handle(),
                d_rev_graph.data_handle(),
                graph_size * output_graph_degree,
-               res.get_stream());
-    raft::copy(
-      rev_graph_count.data_handle(), d_rev_graph_count.data_handle(), graph_size, res.get_stream());
+               resource::get_cuda_stream(res));
+    raft::copy(rev_graph_count.data_handle(),
+               d_rev_graph_count.data_handle(),
+               graph_size,
+               resource::get_cuda_stream(res));
 
     const double time_make_end = cur_time();
     RAFT_LOG_DEBUG("# Making reverse graph time: %.1lf sec", time_make_end - time_make_start);
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
index 99553632ac..4cccc36a23 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
@@ -23,7 +23,9 @@
 #include <memory>
 #include <numeric>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_properties.hpp>
+#include <raft/core/resources.hpp>
 
 #include <vector>
 
@@ -459,21 +461,21 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   size_t topk_workspace_size;
   rmm::device_uvector<uint32_t> topk_workspace;
 
-  search(raft::device_resources const& res,
+  search(raft::resources const& res,
          search_params params,
          int64_t dim,
          int64_t graph_degree,
          uint32_t topk)
     : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T>(res, params, dim, graph_degree, topk),
-      intermediate_indices(0, res.get_stream()),
-      intermediate_distances(0, res.get_stream()),
-      topk_workspace(0, res.get_stream())
+      intermediate_indices(0, resource::get_cuda_stream(res)),
+      intermediate_distances(0, resource::get_cuda_stream(res)),
+      topk_workspace(0, resource::get_cuda_stream(res))
 
   {
     set_params(res);
   }
 
-  void set_params(raft::device_resources const& res)
+  void set_params(raft::resources const& res)
   {
     this->itopk_size   = 32;
     num_parents        = 1;
@@ -508,7 +510,7 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
 
       // Increase block size to improve GPU occupancy when total number of
       // CTAs (= num_cta_per_query * max_queries) is small.
-      cudaDeviceProp deviceProp = res.get_device_properties();
+      cudaDeviceProp deviceProp = resource::get_device_properties(res);
       RAFT_LOG_DEBUG("# multiProcessorCount: %d", deviceProp.multiProcessorCount);
       while ((block_size < max_block_size) &&
              (graph_degree * num_parents * team_size >= block_size * 2) &&
@@ -548,20 +550,20 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
     // Allocate memory for intermediate buffer and workspace.
     //
     uint32_t num_intermediate_results = num_cta_per_query * itopk_size;
-    intermediate_indices.resize(num_intermediate_results, res.get_stream());
-    intermediate_distances.resize(num_intermediate_results, res.get_stream());
+    intermediate_indices.resize(num_intermediate_results, resource::get_cuda_stream(res));
+    intermediate_distances.resize(num_intermediate_results, resource::get_cuda_stream(res));
 
-    hashmap.resize(hashmap_size, res.get_stream());
+    hashmap.resize(hashmap_size, resource::get_cuda_stream(res));
 
     topk_workspace_size = _cuann_find_topk_bufferSize(
       topk, max_queries, num_intermediate_results, utils::get_cuda_data_type<DATA_T>());
     RAFT_LOG_DEBUG("# topk_workspace_size: %lu", topk_workspace_size);
-    topk_workspace.resize(topk_workspace_size, res.get_stream());
+    topk_workspace.resize(topk_workspace_size, resource::get_cuda_stream(res));
   }
 
   ~search() {}
 
-  void operator()(raft::device_resources const& res,
+  void operator()(raft::resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
                   INDEX_T* const topk_indices_ptr,          // [num_queries, topk]
@@ -572,7 +574,7 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
                   uint32_t* const num_executed_iterations,  // [num_queries,]
                   uint32_t topk)
   {
-    cudaStream_t stream = res.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(res);
     uint32_t block_size = thread_block_size;
 
     SET_MC_KERNEL;
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
index e3e9c8a655..439ebd563b 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -23,7 +23,8 @@
 #include <memory>
 #include <numeric>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 #include <vector>
@@ -543,46 +544,48 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   rmm::device_scalar<uint32_t> terminate_flag;  // dev_terminate_flag, host_terminate_flag.;
   rmm::device_uvector<uint32_t> topk_workspace;
 
-  search(raft::device_resources const& res,
+  search(raft::resources const& res,
          search_params params,
          int64_t dim,
          int64_t graph_degree,
          uint32_t topk)
     : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T>(res, params, dim, graph_degree, topk),
-      result_indices(0, res.get_stream()),
-      result_distances(0, res.get_stream()),
-      parent_node_list(0, res.get_stream()),
-      topk_hint(0, res.get_stream()),
-      topk_workspace(0, res.get_stream()),
-      terminate_flag(res.get_stream())
+      result_indices(0, resource::get_cuda_stream(res)),
+      result_distances(0, resource::get_cuda_stream(res)),
+      parent_node_list(0, resource::get_cuda_stream(res)),
+      topk_hint(0, resource::get_cuda_stream(res)),
+      topk_workspace(0, resource::get_cuda_stream(res)),
+      terminate_flag(resource::get_cuda_stream(res))
   {
     set_params(res);
   }
 
-  void set_params(raft::device_resources const& res)
+  void set_params(raft::resources const& res)
   {
     //
     // Allocate memory for intermediate buffer and workspace.
     //
     result_buffer_size            = itopk_size + (num_parents * graph_degree);
     result_buffer_allocation_size = result_buffer_size + itopk_size;
-    result_indices.resize(result_buffer_allocation_size * max_queries, res.get_stream());
-    result_distances.resize(result_buffer_allocation_size * max_queries, res.get_stream());
+    result_indices.resize(result_buffer_allocation_size * max_queries,
+                          resource::get_cuda_stream(res));
+    result_distances.resize(result_buffer_allocation_size * max_queries,
+                            resource::get_cuda_stream(res));
 
-    parent_node_list.resize(max_queries * num_parents, res.get_stream());
-    topk_hint.resize(max_queries, res.get_stream());
+    parent_node_list.resize(max_queries * num_parents, resource::get_cuda_stream(res));
+    topk_hint.resize(max_queries, resource::get_cuda_stream(res));
 
     size_t topk_workspace_size = _cuann_find_topk_bufferSize(
       itopk_size, max_queries, result_buffer_size, utils::get_cuda_data_type<DATA_T>());
     RAFT_LOG_DEBUG("# topk_workspace_size: %lu", topk_workspace_size);
-    topk_workspace.resize(topk_workspace_size, res.get_stream());
+    topk_workspace.resize(topk_workspace_size, resource::get_cuda_stream(res));
 
-    hashmap.resize(hashmap_size, res.get_stream());
+    hashmap.resize(hashmap_size, resource::get_cuda_stream(res));
   }
 
   ~search() {}
 
-  void operator()(raft::device_resources const& res,
+  void operator()(raft::resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
                   INDEX_T* const topk_indices_ptr,          // [num_queries, topk]
@@ -594,7 +597,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
                   uint32_t topk)
   {
     // Init hashmap
-    cudaStream_t stream      = res.get_stream();
+    cudaStream_t stream      = resource::get_cuda_stream(res);
     const uint32_t hash_size = hashmap::get_size(hash_bitlen);
     set_value_batch(
       hashmap.data(), hash_size, utils::get_max_value<uint32_t>(), hash_size, num_queries, stream);
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
index 09d5e71254..b573d7d7ca 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
@@ -17,11 +17,12 @@
 #pragma once
 
 #include "hashmap.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 // #include "search_single_cta.cuh"
 // #include "topk_for_cagra/topk_core.cuh"
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/neighbors/cagra_types.hpp>
 #include <raft/util/pow2_utils.cuh>
 
@@ -84,28 +85,28 @@ struct search_plan_impl : public search_plan_impl_base {
   rmm::device_uvector<uint32_t> num_executed_iterations;  // device or managed?
   rmm::device_uvector<uint32_t> dev_seed;                 // IdxT
 
-  search_plan_impl(raft::device_resources const& res,
+  search_plan_impl(raft::resources const& res,
                    search_params params,
                    int64_t dim,
                    int64_t graph_degree,
                    uint32_t topk)
     : search_plan_impl_base(params, dim, graph_degree, topk),
-      hashmap(0, res.get_stream()),
-      num_executed_iterations(0, res.get_stream()),
-      dev_seed(0, res.get_stream()),
+      hashmap(0, resource::get_cuda_stream(res)),
+      num_executed_iterations(0, resource::get_cuda_stream(res)),
+      dev_seed(0, resource::get_cuda_stream(res)),
       num_seeds(0)
   {
     adjust_search_params();
     check_params();
     calc_hashmap_params(res);
     set_max_dim_team(dim);
-    num_executed_iterations.resize(max_queries, res.get_stream());
+    num_executed_iterations.resize(max_queries, resource::get_cuda_stream(res));
     RAFT_LOG_DEBUG("# algo = %d", static_cast<int>(algo));
   }
 
   virtual ~search_plan_impl() {}
 
-  virtual void operator()(raft::device_resources const& res,
+  virtual void operator()(raft::resources const& res,
                           raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                           raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
                           INDEX_T* const result_indices_ptr,       // [num_queries, topk]
@@ -144,7 +145,7 @@ struct search_plan_impl : public search_plan_impl_base {
   }
 
   // defines hash_bitlen, small_hash_bitlen, small_hash_reset interval, hash_size
-  inline void calc_hashmap_params(raft::device_resources const& res)
+  inline void calc_hashmap_params(raft::resources const& res)
   {
     // for multipel CTA search
     uint32_t mc_num_cta_per_query = 0;
@@ -317,7 +318,7 @@ struct search_plan_impl : public search_plan_impl_base {
 
 // template <class DATA_T, class DISTANCE_T, class INDEX_T>
 // struct search_plan {
-//   search_plan(raft::device_resources const& res,
+//   search_plan(raft::resources const& res,
 //               search_params param,
 //               int64_t dim,
 //               int64_t graph_degree)
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
index 531b30ba85..d64afb0d11 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
@@ -23,7 +23,9 @@
 #include <memory>
 #include <numeric>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_properties.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_uvector.hpp>
 #include <vector>
 
@@ -962,7 +964,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
 
   uint32_t num_itopk_candidates;
 
-  search(raft::device_resources const& res,
+  search(raft::resources const& res,
          search_params params,
          int64_t dim,
          int64_t graph_degree,
@@ -974,7 +976,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
 
   ~search() {}
 
-  inline void set_params(raft::device_resources const& res)
+  inline void set_params(raft::resources const& res)
   {
     num_itopk_candidates = num_parents * graph_degree;
     result_buffer_size   = itopk_size + num_itopk_candidates;
@@ -1036,7 +1038,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
 
       // Increase block size to improve GPU occupancy when batch size
       // is small, that is, number of queries is low.
-      cudaDeviceProp deviceProp = res.get_device_properties();
+      cudaDeviceProp deviceProp = resource::get_device_properties(res);
       RAFT_LOG_DEBUG("# multiProcessorCount: %d", deviceProp.multiProcessorCount);
       while ((block_size < max_block_size) &&
              (graph_degree * num_parents * team_size >= block_size * 2) &&
@@ -1104,12 +1106,12 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
     hashmap_size = 0;
     if (small_hash_bitlen == 0) {
       hashmap_size = sizeof(uint32_t) * max_queries * hashmap::get_size(hash_bitlen);
-      hashmap.resize(hashmap_size, res.get_stream());
+      hashmap.resize(hashmap_size, resource::get_cuda_stream(res));
     }
     RAFT_LOG_DEBUG("# hashmap_size: %lu", hashmap_size);
   }
 
-  void operator()(raft::device_resources const& res,
+  void operator()(raft::resources const& res,
                   raft::device_matrix_view<const DATA_T, INDEX_T, row_major> dataset,
                   raft::device_matrix_view<const INDEX_T, INDEX_T, row_major> graph,
                   INDEX_T* const result_indices_ptr,             // [num_queries, topk]
@@ -1120,7 +1122,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
                   std::uint32_t* const num_executed_iterations,  // [num_queries]
                   uint32_t topk)
   {
-    cudaStream_t stream = res.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(res);
     uint32_t block_size = thread_block_size;
     SET_KERNEL;
     RAFT_CUDA_TRY(
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
index bf7248b983..7c2fa05bfe 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
@@ -17,11 +17,12 @@
 #pragma once
 
 #include <raft/cluster/kmeans_balanced.cuh>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/map.cuh>
 #include <raft/linalg/norm.cuh>
@@ -41,9 +42,9 @@ namespace raft::neighbors::ivf_flat::detail {
 using namespace raft::spatial::knn::detail;  // NOLINT
 
 template <typename T, typename IdxT>
-auto clone(const raft::device_resources& res, const index<T, IdxT>& source) -> index<T, IdxT>
+auto clone(const raft::resources& res, const index<T, IdxT>& source) -> index<T, IdxT>
 {
-  auto stream = res.get_stream();
+  auto stream = resource::get_cuda_stream(res);
 
   // Allocate the new index
   index<T, IdxT> target(res,
@@ -156,7 +157,7 @@ __global__ void build_index_kernel(const LabelT* labels,
 
 /** See raft::neighbors::ivf_flat::extend docs */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<T, IdxT>* index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -165,7 +166,7 @@ void extend(raft::device_resources const& handle,
   using LabelT = uint32_t;
   RAFT_EXPECTS(index != nullptr, "index cannot be empty.");
 
-  auto stream  = handle.get_stream();
+  auto stream  = resource::get_cuda_stream(handle);
   auto n_lists = index->n_lists();
   auto dim     = index->dim();
   list_spec<uint32_t, T, IdxT> list_device_spec{index->dim(),
@@ -226,7 +227,7 @@ void extend(raft::device_resources const& handle,
   {
     copy(old_list_sizes.data(), old_list_sizes_dev.data_handle(), n_lists, stream);
     copy(new_list_sizes.data(), list_sizes_ptr, n_lists, stream);
-    handle.sync_stream();
+    resource::sync_stream(handle);
     auto& lists = index->lists();
     for (uint32_t label = 0; label < n_lists; label++) {
       ivf::resize_list(handle,
@@ -283,7 +284,7 @@ void extend(raft::device_resources const& handle,
 
 /** See raft::neighbors::ivf_flat::extend docs */
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<T, IdxT>& orig_index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -296,13 +297,13 @@ auto extend(raft::device_resources const& handle,
 
 /** See raft::neighbors::ivf_flat::build docs */
 template <typename T, typename IdxT>
-inline auto build(raft::device_resources const& handle,
+inline auto build(raft::resources const& handle,
                   const index_params& params,
                   const T* dataset,
                   IdxT n_rows,
                   uint32_t dim) -> index<T, IdxT>
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_flat::build(%zu, %u)", size_t(n_rows), dim);
   static_assert(std::is_same_v<T, float> || std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t>,
@@ -365,7 +366,7 @@ inline auto build(raft::device_resources const& handle,
  * @param[in] n_candidates  of neighbor_candidates
  */
 template <typename T, typename IdxT>
-inline void fill_refinement_index(raft::device_resources const& handle,
+inline void fill_refinement_index(raft::resources const& handle,
                                   index<T, IdxT>* refinement_index,
                                   const T* dataset,
                                   const IdxT* candidate_idx,
@@ -374,7 +375,7 @@ inline void fill_refinement_index(raft::device_resources const& handle,
 {
   using LabelT = uint32_t;
 
-  auto stream      = handle.get_stream();
+  auto stream      = resource::get_cuda_stream(handle);
   uint32_t n_lists = n_queries;
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries));
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
index 14d15711a6..b97e64a259 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh
@@ -25,7 +25,7 @@
 namespace raft::neighbors::ivf_flat::detail {
 
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const raft::neighbors::ivf_flat::index<T, IdxT>& index,
             const T* queries,
@@ -41,7 +41,7 @@ void search(raft::device_resources const& handle,
 
 #define instantiate_raft_neighbors_ivf_flat_detail_search(T, IdxT)         \
   extern template void raft::neighbors::ivf_flat::detail::search<T, IdxT>( \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     const search_params& params,                                           \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,                \
     const T* queries,                                                      \
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
index c364118fdd..b4711fa14b 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
@@ -16,8 +16,9 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>                       // raft::device_resources
 #include <raft/core/logger.hpp>                                 // RAFT_LOG_TRACE
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>                              // raft::resources
 #include <raft/distance/distance_types.hpp>                     // is_min_close, DistanceType
 #include <raft/linalg/gemm.cuh>                                 // raft::linalg::gemm
 #include <raft/linalg/norm.cuh>                                 // raft::linalg::norm
@@ -33,7 +34,7 @@ namespace raft::neighbors::ivf_flat::detail {
 using namespace raft::spatial::knn::detail;  // NOLINT
 
 template <typename T, typename AccT, typename IdxT>
-void search_impl(raft::device_resources const& handle,
+void search_impl(raft::resources const& handle,
                  const raft::neighbors::ivf_flat::index<T, IdxT>& index,
                  const T* queries,
                  uint32_t n_queries,
@@ -44,7 +45,7 @@ void search_impl(raft::device_resources const& handle,
                  AccT* distances,
                  rmm::mr::device_memory_resource* search_mr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   // The norm of query
   rmm::device_uvector<float> query_norm_dev(n_queries, stream, search_mr);
   // The distance value of cluster(list) and queries
@@ -196,7 +197,7 @@ void search_impl(raft::device_resources const& handle,
 
 /** See raft::neighbors::ivf_flat::search docs */
 template <typename T, typename IdxT>
-inline void search(raft::device_resources const& handle,
+inline void search(raft::resources const& handle,
                    const search_params& params,
                    const index<T, IdxT>& index,
                    const T* queries,
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
index bec3b890eb..af2e6ba0f8 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/mdarray.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/serialize.hpp>
 #include <raft/neighbors/ivf_flat_types.hpp>
 #include <raft/neighbors/ivf_list.hpp>
@@ -56,7 +57,7 @@ template struct check_index_layout<sizeof(index<double, std::uint64_t>), 296>;
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle, std::ostream& os, const index<T, IdxT>& index_)
+void serialize(raft::resources const& handle, std::ostream& os, const index<T, IdxT>& index_)
 {
   RAFT_LOG_DEBUG(
     "Saving IVF-Flat index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());
@@ -81,8 +82,8 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
   copy(sizes_host.data_handle(),
        index_.list_sizes().data_handle(),
        sizes_host.size(),
-       handle.get_stream());
-  handle.sync_stream();
+       resource::get_cuda_stream(handle));
+  resource::sync_stream(handle);
   serialize_mdspan(handle, os, sizes_host.view());
 
   list_spec<uint32_t, T, IdxT> list_store_spec{index_.dim(), true};
@@ -93,11 +94,11 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
                         list_store_spec,
                         Pow2<kIndexGroupSize>::roundUp(sizes_host(label)));
   }
-  handle.sync_stream();
+  resource::sync_stream(handle);
 }
 
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle,
+void serialize(raft::resources const& handle,
                const std::string& filename,
                const index<T, IdxT>& index_)
 {
@@ -120,7 +121,7 @@ void serialize(raft::device_resources const& handle,
  *
  */
 template <typename T, typename IdxT>
-auto deserialize(raft::device_resources const& handle, std::istream& is) -> index<T, IdxT>
+auto deserialize(raft::resources const& handle, std::istream& is) -> index<T, IdxT>
 {
   auto ver = deserialize_scalar<int>(handle, is);
   if (ver != serialization_version) {
@@ -153,7 +154,7 @@ auto deserialize(raft::device_resources const& handle, std::istream& is) -> inde
   for (uint32_t label = 0; label < index_.n_lists(); label++) {
     ivf::deserialize_list(handle, is, index_.lists()[label], list_store_spec, list_device_spec);
   }
-  handle.sync_stream();
+  resource::sync_stream(handle);
 
   index_.recompute_internal_state(handle);
 
@@ -161,8 +162,7 @@ auto deserialize(raft::device_resources const& handle, std::istream& is) -> inde
 }
 
 template <typename T, typename IdxT>
-auto deserialize(raft::device_resources const& handle, const std::string& filename)
-  -> index<T, IdxT>
+auto deserialize(raft::resources const& handle, const std::string& filename) -> index<T, IdxT>
 {
   std::ifstream is(filename, std::ios::in | std::ios::binary);
 
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
index 53d8823eea..4a54d33a02 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
 #include <raft/neighbors/detail/ivf_pq_codepacking.cuh>
@@ -24,10 +26,10 @@
 
 #include <raft/cluster/kmeans_balanced.cuh>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/detail/qr.cuh>
@@ -116,7 +118,7 @@ void copy_warped(T* out,
  * @param[out] rotation_matrix device pointer to a row-major matrix of size [n_rows, n_cols].
  * @param rng random number generator state
  */
-inline void make_rotation_matrix(raft::device_resources const& handle,
+inline void make_rotation_matrix(raft::resources const& handle,
                                  bool force_random_rotation,
                                  uint32_t n_rows,
                                  uint32_t n_cols,
@@ -125,7 +127,7 @@ inline void make_rotation_matrix(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_pq::make_rotation_matrix(%u * %u)", n_rows, n_cols);
-  auto stream  = handle.get_stream();
+  auto stream  = resource::get_cuda_stream(handle);
   bool inplace = n_rows == n_cols;
   uint32_t n   = std::max(n_rows, n_cols);
   if (force_random_rotation || !inplace) {
@@ -160,7 +162,7 @@ inline void make_rotation_matrix(raft::device_resources const& handle,
  *
  */
 template <typename T, typename IdxT>
-void select_residuals(raft::device_resources const& handle,
+void select_residuals(raft::resources const& handle,
                       float* residuals,
                       IdxT n_rows,
                       uint32_t dim,
@@ -173,7 +175,7 @@ void select_residuals(raft::device_resources const& handle,
 
 )
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   rmm::device_uvector<float> tmp(size_t(n_rows) * size_t(dim), stream, device_memory);
   // Note: the number of rows of the input dataset isn't actually n_rows, but matrix::gather doesn't
   // need to know it, any strictly positive number would work.
@@ -216,7 +218,7 @@ void select_residuals(raft::device_resources const& handle,
  */
 template <typename T, typename IdxT>
 void flat_compute_residuals(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   float* residuals,                                                      // [n_rows, rot_dim]
   IdxT n_rows,
   device_matrix_view<const float, uint32_t, row_major> rotation_matrix,  // [rot_dim, dim]
@@ -225,7 +227,7 @@ void flat_compute_residuals(
   std::variant<uint32_t, const uint32_t*> labels,                        // [n_rows]
   rmm::mr::device_memory_resource* device_memory)
 {
-  auto stream  = handle.get_stream();
+  auto stream  = resource::get_cuda_stream(handle);
   auto dim     = rotation_matrix.extent(1);
   auto rot_dim = rotation_matrix.extent(0);
   rmm::device_uvector<float> tmp(n_rows * dim, stream, device_memory);
@@ -313,11 +315,11 @@ auto calculate_offsets_and_indices(IdxT n_rows,
 }
 
 template <typename IdxT>
-void transpose_pq_centers(const device_resources& handle,
+void transpose_pq_centers(const resources& handle,
                           index<IdxT>& index,
                           const float* pq_centers_source)
 {
-  auto stream  = handle.get_stream();
+  auto stream  = resource::get_cuda_stream(handle);
   auto extents = index.pq_centers().extents();
   static_assert(extents.rank() == 3);
   auto extents_source =
@@ -338,7 +340,7 @@ void transpose_pq_centers(const device_resources& handle,
 }
 
 template <typename IdxT>
-void train_per_subset(raft::device_resources const& handle,
+void train_per_subset(raft::resources const& handle,
                       index<IdxT>& index,
                       size_t n_rows,
                       const float* trainset,   // [n_rows, dim]
@@ -347,7 +349,7 @@ void train_per_subset(raft::device_resources const& handle,
                       rmm::mr::device_memory_resource* managed_memory,
                       rmm::mr::device_memory_resource* device_memory)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(index.pq_centers().size(), stream, device_memory);
   rmm::device_uvector<float> sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory);
@@ -391,7 +393,8 @@ void train_per_subset(raft::device_resources const& handle,
                  stream);
 
     // clone the handle and attached the device memory resource to it
-    const device_resources new_handle(handle, device_memory);
+    const resources new_handle(handle);
+    resource::set_workspace_resource(new_handle, device_memory);
 
     // train PQ codebook for this subspace
     auto sub_trainset_view =
@@ -418,7 +421,7 @@ void train_per_subset(raft::device_resources const& handle,
 }
 
 template <typename IdxT>
-void train_per_cluster(raft::device_resources const& handle,
+void train_per_cluster(raft::resources const& handle,
                        index<IdxT>& index,
                        size_t n_rows,
                        const float* trainset,   // [n_rows, dim]
@@ -427,7 +430,7 @@ void train_per_cluster(raft::device_resources const& handle,
                        rmm::mr::device_memory_resource* managed_memory,
                        rmm::mr::device_memory_resource* device_memory)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(index.pq_centers().size(), stream, device_memory);
   rmm::device_uvector<uint32_t> cluster_sizes(index.n_lists(), stream, managed_memory);
@@ -453,7 +456,7 @@ void train_per_cluster(raft::device_resources const& handle,
   rmm::device_uvector<float> rot_vectors(
     size_t(max_cluster_size) * size_t(index.rot_dim()), stream, device_memory);
 
-  handle.sync_stream();  // make sure cluster offsets are up-to-date
+  resource::sync_stream(handle);  // make sure cluster offsets are up-to-date
   for (uint32_t l = 0; l < index.n_lists(); l++) {
     auto cluster_size = cluster_sizes.data()[l];
     if (cluster_size == 0) continue;
@@ -472,7 +475,8 @@ void train_per_cluster(raft::device_resources const& handle,
                      device_memory);
 
     // clone the handle and attached the device memory resource to it
-    const device_resources new_handle(handle, device_memory);
+    const resources new_handle(handle);
+    resource::set_workspace_resource(new_handle, device_memory);
 
     // limit the cluster size to bound the training time.
     // [sic] we interpret the data as pq_len-dimensional
@@ -605,7 +609,7 @@ inline void unpack_list_data(
 
 /** Unpack the list data; see the public interface for the api and usage. */
 template <typename IdxT>
-void unpack_list_data(raft::device_resources const& res,
+void unpack_list_data(raft::resources const& res,
                       const index<IdxT>& index,
                       device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
                       uint32_t label,
@@ -615,7 +619,7 @@ void unpack_list_data(raft::device_resources const& res,
                    index.lists()[label]->data.view(),
                    offset_or_indices,
                    index.pq_bits(),
-                   res.get_stream());
+                   resource::get_cuda_stream(res));
 }
 
 /** A consumer for the `run_on_list` and `run_on_vector` that approximates the original input data.
@@ -693,7 +697,7 @@ __launch_bounds__(BlockSize) __global__ void reconstruct_list_data_kernel(
 
 /** Decode the list data; see the public interface for the api and usage. */
 template <typename T, typename IdxT>
-void reconstruct_list_data(raft::device_resources const& res,
+void reconstruct_list_data(raft::resources const& res,
                            const index<IdxT>& index,
                            device_matrix_view<T, uint32_t, row_major> out_vectors,
                            uint32_t label,
@@ -711,7 +715,7 @@ void reconstruct_list_data(raft::device_resources const& res,
   }
 
   auto tmp = make_device_mdarray<float>(
-    res, res.get_workspace_resource(), make_extents<uint32_t>(n_rows, index.rot_dim()));
+    res, resource::get_workspace_resource(res), make_extents<uint32_t>(n_rows, index.rot_dim()));
 
   constexpr uint32_t kBlockSize = 256;
   dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
@@ -726,21 +730,22 @@ void reconstruct_list_data(raft::device_resources const& res,
       default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
     }
   }(index.pq_bits());
-  kernel<<<blocks, threads, 0, res.get_stream()>>>(tmp.view(),
-                                                   list->data.view(),
-                                                   index.pq_centers(),
-                                                   index.centers_rot(),
-                                                   index.codebook_kind(),
-                                                   label,
-                                                   offset_or_indices);
+  kernel<<<blocks, threads, 0, resource::get_cuda_stream(res)>>>(tmp.view(),
+                                                                 list->data.view(),
+                                                                 index.pq_centers(),
+                                                                 index.centers_rot(),
+                                                                 index.codebook_kind(),
+                                                                 label,
+                                                                 offset_or_indices);
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 
   float* out_float_ptr = nullptr;
-  rmm::device_uvector<float> out_float_buf(0, res.get_stream(), res.get_workspace_resource());
+  rmm::device_uvector<float> out_float_buf(
+    0, resource::get_cuda_stream(res), resource::get_workspace_resource(res));
   if constexpr (std::is_same_v<T, float>) {
     out_float_ptr = out_vectors.data_handle();
   } else {
-    out_float_buf.resize(size_t{n_rows} * size_t{index.dim()}, res.get_stream());
+    out_float_buf.resize(size_t{n_rows} * size_t{index.dim()}, resource::get_cuda_stream(res));
     out_float_ptr = out_float_buf.data();
   }
   // Rotate the results back to the original space
@@ -760,7 +765,7 @@ void reconstruct_list_data(raft::device_resources const& res,
                &beta,
                out_float_ptr,
                index.dim(),
-               res.get_stream());
+               resource::get_cuda_stream(res));
   // Transform the data to the original type, if necessary
   if constexpr (!std::is_same_v<T, float>) {
     linalg::map(res,
@@ -841,7 +846,7 @@ inline void pack_list_data(
 }
 
 template <typename IdxT>
-void pack_list_data(raft::device_resources const& res,
+void pack_list_data(raft::resources const& res,
                     index<IdxT>* index,
                     device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
                     uint32_t label,
@@ -851,7 +856,7 @@ void pack_list_data(raft::device_resources const& res,
                  new_codes,
                  offset_or_indices,
                  index->pq_bits(),
-                 res.get_stream());
+                 resource::get_cuda_stream(res));
 }
 
 /**
@@ -1007,7 +1012,7 @@ __launch_bounds__(BlockSize) __global__ void encode_list_data_kernel(
 }
 
 template <typename T, typename IdxT>
-void encode_list_data(raft::device_resources const& res,
+void encode_list_data(raft::resources const& res,
                       index<IdxT>* index,
                       device_matrix_view<const T, uint32_t, row_major> new_vectors,
                       uint32_t label,
@@ -1016,7 +1021,7 @@ void encode_list_data(raft::device_resources const& res,
   auto n_rows = new_vectors.extent(0);
   if (n_rows == 0) { return; }
 
-  auto mr = res.get_workspace_resource();
+  auto mr = resource::get_workspace_resource(res);
 
   auto new_vectors_residual =
     make_device_mdarray<float>(res, mr, make_extents<uint32_t>(n_rows, index->rot_dim()));
@@ -1044,12 +1049,12 @@ void encode_list_data(raft::device_resources const& res,
       default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
     }
   }(index->pq_bits());
-  kernel<<<blocks, threads, 0, res.get_stream()>>>(index->lists()[label]->data.view(),
-                                                   new_vectors_residual.view(),
-                                                   index->pq_centers(),
-                                                   index->codebook_kind(),
-                                                   label,
-                                                   offset_or_indices);
+  kernel<<<blocks, threads, 0, resource::get_cuda_stream(res)>>>(index->lists()[label]->data.view(),
+                                                                 new_vectors_residual.view(),
+                                                                 index->pq_centers(),
+                                                                 index->codebook_kind(),
+                                                                 label,
+                                                                 offset_or_indices);
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
@@ -1081,7 +1086,7 @@ void encode_list_data(raft::device_resources const& res,
  *    a memory resource to use for device allocations
  */
 template <typename T, typename IdxT>
-void process_and_fill_codes(raft::device_resources const& handle,
+void process_and_fill_codes(raft::resources const& handle,
                             index<IdxT>& index,
                             const T* new_vectors,
                             std::variant<IdxT, const IdxT*> src_offset_or_indices,
@@ -1115,23 +1120,23 @@ void process_and_fill_codes(raft::device_resources const& handle,
       default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits);
     }
   }(index.pq_bits());
-  kernel<<<blocks, threads, 0, handle.get_stream()>>>(new_vectors_residual.view(),
-                                                      src_offset_or_indices,
-                                                      new_labels,
-                                                      index.list_sizes(),
-                                                      index.inds_ptrs(),
-                                                      index.data_ptrs(),
-                                                      index.pq_centers(),
-                                                      index.codebook_kind());
+  kernel<<<blocks, threads, 0, resource::get_cuda_stream(handle)>>>(new_vectors_residual.view(),
+                                                                    src_offset_or_indices,
+                                                                    new_labels,
+                                                                    index.list_sizes(),
+                                                                    index.inds_ptrs(),
+                                                                    index.data_ptrs(),
+                                                                    index.pq_centers(),
+                                                                    index.codebook_kind());
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
 /** Update the state of the dependent index members. */
 template <typename IdxT>
-void recompute_internal_state(const raft::device_resources& res, index<IdxT>& index)
+void recompute_internal_state(const raft::resources& res, index<IdxT>& index)
 {
-  auto stream  = res.get_stream();
-  auto tmp_res = res.get_workspace_resource();
+  auto stream  = resource::get_cuda_stream(res);
+  auto tmp_res = resource::get_workspace_resource(res);
   rmm::device_uvector<uint32_t> sorted_sizes(index.n_lists(), stream, tmp_res);
 
   // Actualize the list pointers
@@ -1169,7 +1174,7 @@ void recompute_internal_state(const raft::device_resources& res, index<IdxT>& in
   // copy the results to CPU
   std::vector<uint32_t> sorted_sizes_host(index.n_lists());
   copy(sorted_sizes_host.data(), sorted_sizes.data(), index.n_lists(), stream);
-  res.sync_stream();
+  resource::sync_stream(res);
 
   // accumulate the sorted cluster sizes
   auto accum_sorted_sizes = index.accum_sorted_sizes();
@@ -1186,7 +1191,7 @@ void recompute_internal_state(const raft::device_resources& res, index<IdxT>& in
  * @return offset for writing the data
  */
 template <typename IdxT>
-auto extend_list_prepare(raft::device_resources const& res,
+auto extend_list_prepare(raft::resources const& res,
                          index<IdxT>* index,
                          device_vector_view<const IdxT, uint32_t, row_major> new_indices,
                          uint32_t label) -> uint32_t
@@ -1194,15 +1199,18 @@ auto extend_list_prepare(raft::device_resources const& res,
   uint32_t n_rows = new_indices.extent(0);
   uint32_t offset;
   // Allocate the lists to fit the new data
-  copy(&offset, index->list_sizes().data_handle() + label, 1, res.get_stream());
-  res.sync_stream();
+  copy(&offset, index->list_sizes().data_handle() + label, 1, resource::get_cuda_stream(res));
+  resource::sync_stream(res);
   uint32_t new_size = offset + n_rows;
-  copy(index->list_sizes().data_handle() + label, &new_size, 1, res.get_stream());
+  copy(index->list_sizes().data_handle() + label, &new_size, 1, resource::get_cuda_stream(res));
   auto spec = list_spec<uint32_t, IdxT>{
     index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()};
   auto& list = index->lists()[label];
   ivf::resize_list(res, list, spec, new_size, offset);
-  copy(list->indices.data_handle() + offset, new_indices.data_handle(), n_rows, res.get_stream());
+  copy(list->indices.data_handle() + offset,
+       new_indices.data_handle(),
+       n_rows,
+       resource::get_cuda_stream(res));
   return offset;
 }
 
@@ -1212,7 +1220,7 @@ auto extend_list_prepare(raft::device_resources const& res,
  * See the public interface for the api and usage.
  */
 template <typename IdxT>
-void extend_list_with_codes(raft::device_resources const& res,
+void extend_list_with_codes(raft::resources const& res,
                             index<IdxT>* index,
                             device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
                             device_vector_view<const IdxT, uint32_t, row_major> new_indices,
@@ -1231,7 +1239,7 @@ void extend_list_with_codes(raft::device_resources const& res,
  * See the public interface for the api and usage.
  */
 template <typename T, typename IdxT>
-void extend_list(raft::device_resources const& res,
+void extend_list(raft::resources const& res,
                  index<IdxT>* index,
                  device_matrix_view<const T, uint32_t, row_major> new_vectors,
                  device_vector_view<const IdxT, uint32_t, row_major> new_indices,
@@ -1250,19 +1258,19 @@ void extend_list(raft::device_resources const& res,
  * See the public interface for the api and usage.
  */
 template <typename IdxT>
-void erase_list(raft::device_resources const& res, index<IdxT>* index, uint32_t label)
+void erase_list(raft::resources const& res, index<IdxT>* index, uint32_t label)
 {
   uint32_t zero = 0;
-  copy(index->list_sizes().data_handle() + label, &zero, 1, res.get_stream());
+  copy(index->list_sizes().data_handle() + label, &zero, 1, resource::get_cuda_stream(res));
   index->lists()[label].reset();
   recompute_internal_state(res, *index);
 }
 
 /** Copy the state of an index into a new index, but share the list data among the two. */
 template <typename IdxT>
-auto clone(const raft::device_resources& res, const index<IdxT>& source) -> index<IdxT>
+auto clone(const raft::resources& res, const index<IdxT>& source) -> index<IdxT>
 {
-  auto stream = res.get_stream();
+  auto stream = resource::get_cuda_stream(res);
 
   // Allocate the new index
   index<IdxT> target(res,
@@ -1309,7 +1317,7 @@ auto clone(const raft::device_resources& res, const index<IdxT>& source) -> inde
  * See raft::spatial::knn::ivf_pq::extend docs.
  */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<IdxT>* index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -1317,7 +1325,7 @@ void extend(raft::device_resources const& handle,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "ivf_pq::extend(%zu, %u)", size_t(n_rows), index->dim());
-  auto stream           = handle.get_stream();
+  auto stream           = resource::get_cuda_stream(handle);
   const auto n_clusters = index->n_lists();
 
   RAFT_EXPECTS(new_indices != nullptr || index->size() == 0,
@@ -1458,7 +1466,7 @@ void extend(raft::device_resources const& handle,
     std::vector<uint32_t> old_cluster_sizes(n_clusters);
     copy(new_cluster_sizes.data(), list_sizes, n_clusters, stream);
     copy(old_cluster_sizes.data(), orig_list_sizes.data(), n_clusters, stream);
-    handle.sync_stream();
+    resource::sync_stream(handle);
     for (uint32_t label = 0; label < n_clusters; label++) {
       ivf::resize_list(
         handle, index->lists()[label], spec, new_cluster_sizes[label], old_cluster_sizes[label]);
@@ -1494,7 +1502,7 @@ void extend(raft::device_resources const& handle,
  * See raft::spatial::knn::ivf_pq::extend docs.
  */
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<IdxT>& orig_index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -1507,7 +1515,7 @@ auto extend(raft::device_resources const& handle,
 
 /** See raft::spatial::knn::ivf_pq::build docs */
 template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            const T* dataset,
            IdxT n_rows,
@@ -1520,7 +1528,7 @@ auto build(raft::device_resources const& handle,
 
   RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset");
 
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   index<IdxT> index(handle, params, dim);
   utils::memzero(
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
index 87f9bfb622..8a4d3277da 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_fp_8bit.cuh
@@ -22,10 +22,10 @@
 
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/gemm.cuh>
 #include <raft/linalg/map.cuh>
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index c1c15d3424..149ea52b6a 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_properties.hpp>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
 #include <raft/neighbors/detail/ivf_pq_compute_similarity.cuh>
@@ -25,10 +27,10 @@
 
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/gemm.cuh>
 #include <raft/linalg/map.cuh>
@@ -62,7 +64,7 @@ using namespace raft::spatial::knn::detail;  // NOLINT
  * scores here.
  */
 template <typename T>
-void select_clusters(raft::device_resources const& handle,
+void select_clusters(raft::resources const& handle,
                      uint32_t* clusters_to_probe,  // [n_queries, n_probes]
                      float* float_queries,         // [n_queries, dim_ext]
                      uint32_t n_queries,
@@ -75,7 +77,7 @@ void select_clusters(raft::device_resources const& handle,
                      const float* cluster_centers,  // [n_lists, dim_ext]
                      rmm::mr::device_memory_resource* mr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   /* NOTE[qc_distances]
 
   We compute query-center distances to choose the clusters to probe.
@@ -413,7 +415,7 @@ constexpr inline auto expected_probe_coresidency(uint32_t n_clusters,
  *      is guaranteed to fit into GPU memory.
  */
 template <typename ScoreT, typename LutT, typename IdxT>
-void ivfpq_search_worker(raft::device_resources const& handle,
+void ivfpq_search_worker(raft::resources const& handle,
                          const index<IdxT>& index,
                          uint32_t max_samples,
                          uint32_t n_probes,
@@ -427,7 +429,7 @@ void ivfpq_search_worker(raft::device_resources const& handle,
                          double preferred_shmem_carveout,
                          rmm::mr::device_memory_resource* mr)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   bool manage_local_topk = is_local_topk_feasible(topK, n_probes, n_queries);
   auto topk_len          = manage_local_topk ? n_probes * topK : max_samples;
@@ -526,16 +528,17 @@ void ivfpq_search_worker(raft::device_resources const& handle,
     } break;
   }
 
-  auto search_instance = compute_similarity_select<ScoreT, LutT>(handle.get_device_properties(),
-                                                                 manage_local_topk,
-                                                                 coresidency,
-                                                                 preferred_shmem_carveout,
-                                                                 index.pq_bits(),
-                                                                 index.pq_dim(),
-                                                                 precomp_data_count,
-                                                                 n_queries,
-                                                                 n_probes,
-                                                                 topK);
+  auto search_instance =
+    compute_similarity_select<ScoreT, LutT>(resource::get_device_properties(handle),
+                                            manage_local_topk,
+                                            coresidency,
+                                            preferred_shmem_carveout,
+                                            index.pq_bits(),
+                                            index.pq_dim(),
+                                            precomp_data_count,
+                                            n_queries,
+                                            n_probes,
+                                            topK);
 
   rmm::device_uvector<LutT> device_lut(search_instance.device_lut_size, stream, mr);
   std::optional<device_vector<float>> query_kths_buf{std::nullopt};
@@ -710,7 +713,7 @@ inline auto get_max_batch_size(uint32_t k,
 
 /** See raft::spatial::knn::ivf_pq::search docs */
 template <typename T, typename IdxT>
-inline void search(raft::device_resources const& handle,
+inline void search(raft::resources const& handle,
                    const search_params& params,
                    const index<IdxT>& index,
                    const T* queries,
@@ -750,7 +753,7 @@ inline void search(raft::device_resources const& handle,
     default: RAFT_FAIL("all pointers must be accessible from the device.");
   }
 
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   auto dim      = index.dim();
   auto dim_ext  = index.dim_ext();
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
index 7d70ab9fbe..ff5bd8ef89 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_serialize.cuh
@@ -16,13 +16,14 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/neighbors/detail/ivf_pq_build.cuh>
 #include <raft/neighbors/ivf_list.hpp>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/serialize.hpp>
 
 #include <fstream>
@@ -60,7 +61,7 @@ template struct check_index_layout<sizeof(index<std::uint64_t>), 448>;
  *
  */
 template <typename IdxT>
-void serialize(raft::device_resources const& handle_, std::ostream& os, const index<IdxT>& index)
+void serialize(raft::resources const& handle_, std::ostream& os, const index<IdxT>& index)
 {
   RAFT_LOG_DEBUG("Size %zu, dim %d, pq_dim %d, pq_bits %d",
                  static_cast<size_t>(index.size()),
@@ -88,8 +89,8 @@ void serialize(raft::device_resources const& handle_, std::ostream& os, const in
   copy(sizes_host.data_handle(),
        index.list_sizes().data_handle(),
        sizes_host.size(),
-       handle_.get_stream());
-  handle_.sync_stream();
+       resource::get_cuda_stream(handle_));
+  resource::sync_stream(handle_);
   serialize_mdspan(handle_, os, sizes_host.view());
   auto list_store_spec = list_spec<uint32_t, IdxT>{index.pq_bits(), index.pq_dim(), true};
   for (uint32_t label = 0; label < index.n_lists(); label++) {
@@ -108,7 +109,7 @@ void serialize(raft::device_resources const& handle_, std::ostream& os, const in
  *
  */
 template <typename IdxT>
-void serialize(raft::device_resources const& handle_,
+void serialize(raft::resources const& handle_,
                const std::string& filename,
                const index<IdxT>& index)
 {
@@ -132,7 +133,7 @@ void serialize(raft::device_resources const& handle_,
  *
  */
 template <typename IdxT>
-auto deserialize(raft::device_resources const& handle_, std::istream& is) -> index<IdxT>
+auto deserialize(raft::resources const& handle_, std::istream& is) -> index<IdxT>
 {
   auto ver = deserialize_scalar<int>(handle_, is);
   if (ver != kSerializationVersion) {
@@ -169,7 +170,7 @@ auto deserialize(raft::device_resources const& handle_, std::istream& is) -> ind
     ivf::deserialize_list(handle_, is, list, list_store_spec, list_device_spec);
   }
 
-  handle_.sync_stream();
+  resource::sync_stream(handle_);
 
   recompute_internal_state(handle_, index);
 
@@ -186,7 +187,7 @@ auto deserialize(raft::device_resources const& handle_, std::istream& is) -> ind
  *
  */
 template <typename IdxT>
-auto deserialize(raft::device_resources const& handle_, const std::string& filename) -> index<IdxT>
+auto deserialize(raft::resources const& handle_, const std::string& filename) -> index<IdxT>
 {
   std::ifstream infile(filename, std::ios::in | std::ios::binary);
 
diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
index 879aafee32..6cb77bac94 100644
--- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
@@ -16,6 +16,10 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/cuda_stream_pool.hpp>
@@ -24,7 +28,7 @@
 
 #include <cstdint>
 #include <iostream>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/map.cuh>
@@ -51,7 +55,7 @@ using namespace raft::spatial::knn;
 template <typename ElementType      = float,
           typename IndexType        = int64_t,
           typename DistanceEpilogue = raft::identity_op>
-void tiled_brute_force_knn(const raft::device_resources& handle,
+void tiled_brute_force_knn(const raft::resources& handle,
                            const ElementType* search,  // size (m ,d)
                            const ElementType* index,   // size (n ,d)
                            size_t m,
@@ -69,8 +73,8 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
   // Figure out the number of rows/cols to tile for
   size_t tile_rows   = 0;
   size_t tile_cols   = 0;
-  auto stream        = handle.get_stream();
-  auto device_memory = handle.get_workspace_resource();
+  auto stream        = resource::get_cuda_stream(handle);
+  auto device_memory = resource::get_workspace_resource(handle);
   auto total_mem     = device_memory->get_mem_info(stream).second;
   faiss_select::chooseTileSize(m, n, d, sizeof(ElementType), total_mem, tile_rows, tile_cols);
 
@@ -251,7 +255,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle,
         IndexType* out_indices          = temp_out_indices.data();
 
         auto count = thrust::make_counting_iterator<IndexType>(0);
-        thrust::for_each(handle.get_thrust_policy(),
+        thrust::for_each(resource::get_thrust_policy(handle),
                          count,
                          count + current_query_size * current_k,
                          [=] __device__(IndexType i) {
@@ -308,7 +312,7 @@ template <typename IntType          = int,
           typename value_t          = float,
           typename DistanceEpilogue = raft::identity_op>
 void brute_force_knn_impl(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   std::vector<value_t*>& input,
   std::vector<IntType>& sizes,
   IntType D,
@@ -324,7 +328,7 @@ void brute_force_knn_impl(
   float metricArg                     = 0,
   DistanceEpilogue distance_epilogue  = raft::identity_op())
 {
-  auto userStream = handle.get_stream();
+  auto userStream = resource::get_cuda_stream(handle);
 
   ASSERT(input.size() == sizes.size(), "input and sizes vectors should be the same size");
 
@@ -390,14 +394,14 @@ void brute_force_knn_impl(
   }
 
   // Make other streams from pool wait on main stream
-  handle.wait_stream_pool_on_stream();
+  resource::wait_stream_pool_on_stream(handle);
 
   size_t total_rows_processed = 0;
   for (size_t i = 0; i < input.size(); i++) {
     value_t* out_d_ptr = out_D + (i * k * n);
     IdxType* out_i_ptr = out_I + (i * k * n);
 
-    auto stream = handle.get_next_usable_stream(i);
+    auto stream = resource::get_next_usable_stream(handle, i);
 
     if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true &&
         std::is_same_v<DistanceEpilogue, raft::identity_op> &&
@@ -442,7 +446,7 @@ void brute_force_knn_impl(
           break;
         default:
           // Create a new handle with the current stream from the stream pool
-          raft::device_resources stream_pool_handle(handle);
+          raft::resources stream_pool_handle(handle);
           raft::resource::set_cuda_stream(stream_pool_handle, stream);
 
           auto index = input[i];
@@ -476,7 +480,7 @@ void brute_force_knn_impl(
   // Sync internal streams if used. We don't need to
   // sync the user stream because we'll already have
   // fully serial execution.
-  handle.sync_stream_pool();
+  resource::sync_stream_pool(handle);
 
   if (input.size() > 1 || translations != nullptr) {
     // This is necessary for proper index translations. If there are
diff --git a/cpp/include/raft/neighbors/detail/refine.cuh b/cpp/include/raft/neighbors/detail/refine.cuh
index 0ff5e4cdbc..64f9511ff9 100644
--- a/cpp/include/raft/neighbors/detail/refine.cuh
+++ b/cpp/include/raft/neighbors/detail/refine.cuh
@@ -17,9 +17,11 @@
 #pragma once
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/neighbors/detail/ivf_flat_build.cuh>
 #include <raft/neighbors/detail/ivf_flat_interleaved_scan.cuh>
@@ -74,7 +76,7 @@ void check_input(extents_t dataset,
  * See raft::neighbors::refine for docs.
  */
 template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine_device(raft::device_resources const& handle,
+void refine_device(raft::resources const& handle,
                    raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
                    raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
                    raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
@@ -104,10 +106,11 @@ void refine_device(raft::device_resources const& handle,
   // - We consider that the coarse level search is already performed and assigned a single cluster
   //   to search for each query (the cluster formed from the corresponding candidates).
   // - We run IVF flat search with n_probes=1 to select the best k elements of the candidates.
-  rmm::device_uvector<uint32_t> fake_coarse_idx(n_queries, handle.get_stream());
+  rmm::device_uvector<uint32_t> fake_coarse_idx(n_queries, resource::get_cuda_stream(handle));
 
-  thrust::sequence(
-    handle.get_thrust_policy(), fake_coarse_idx.data(), fake_coarse_idx.data() + n_queries);
+  thrust::sequence(resource::get_thrust_policy(handle),
+                   fake_coarse_idx.data(),
+                   fake_coarse_idx.data() + n_queries);
 
   raft::neighbors::ivf_flat::index<data_t, idx_t> refinement_index(
     handle, metric, n_queries, false, true, dim);
@@ -133,7 +136,7 @@ void refine_device(raft::device_resources const& handle,
            indices.data_handle(),
            distances.data_handle(),
            grid_dim_x,
-           handle.get_stream());
+           resource::get_cuda_stream(handle));
 }
 
 /** Helper structure for naive CPU implementation of refine. */
diff --git a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh
index 7db5ef6877..bade4385fb 100644
--- a/cpp/include/raft/neighbors/epsilon_neighborhood.cuh
+++ b/cpp/include/raft/neighbors/epsilon_neighborhood.cuh
@@ -20,7 +20,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/spatial/knn/detail/epsilon_neighborhood.cuh>
 
 namespace raft::neighbors::epsilon_neighborhood {
@@ -72,10 +73,10 @@ void epsUnexpL2SqNeighborhood(bool* adj,
  *
  * @code{.cpp}
  *  #include <raft/neighbors/epsilon_neighborhood.cuh>
- *  #include <raft/core/device_resources.hpp>
+ *  #include <raft/core/resources.hpp>
  *  #include <raft/core/device_mdarray.hpp>
  *  using namespace raft::neighbors;
- *  raft::raft::device_resources handle;
+ *  raft::raft::resources handle;
  *  ...
  *  auto adj = raft::make_device_matrix<bool>(handle, m * n);
  *  auto vd = raft::make_device_vector<int>(handle, m+1);
@@ -97,7 +98,7 @@ void epsUnexpL2SqNeighborhood(bool* adj,
  *                    squared as we compute L2-squared distance in this method)
  */
 template <typename value_t, typename idx_t, typename matrix_idx_t>
-void eps_neighbors_l2sq(raft::device_resources const& handle,
+void eps_neighbors_l2sq(raft::resources const& handle,
                         raft::device_matrix_view<const value_t, matrix_idx_t, row_major> x,
                         raft::device_matrix_view<const value_t, matrix_idx_t, row_major> y,
                         raft::device_matrix_view<bool, matrix_idx_t, row_major> adj,
@@ -112,7 +113,7 @@ void eps_neighbors_l2sq(raft::device_resources const& handle,
                                            y.extent(0),
                                            x.extent(1),
                                            eps,
-                                           handle.get_stream());
+                                           resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group epsilon_neighbors
diff --git a/cpp/include/raft/neighbors/ivf_flat-ext.cuh b/cpp/include/raft/neighbors/ivf_flat-ext.cuh
index 2dfe8dcc78..dff7b6b2ab 100644
--- a/cpp/include/raft/neighbors/ivf_flat-ext.cuh
+++ b/cpp/include/raft/neighbors/ivf_flat-ext.cuh
@@ -19,7 +19,7 @@
 #include <cstdint>                                // int64_t
 
 #include <raft/core/device_mdspan.hpp>            // raft::device_matrix_view
-#include <raft/core/device_resources.hpp>         // raft::device_resources
+#include <raft/core/resources.hpp>                // raft::resources
 #include <raft/neighbors/ivf_flat_serialize.cuh>
 #include <raft/neighbors/ivf_flat_types.hpp>      // raft::neighbors::ivf_flat::index
 #include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
@@ -30,52 +30,52 @@
 namespace raft::neighbors::ivf_flat {
 
 template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            const T* dataset,
            IdxT n_rows,
            uint32_t dim) -> index<T, IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            raft::device_matrix_view<const T, IdxT, row_major> dataset)
   -> index<T, IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void build(raft::device_resources const& handle,
+void build(raft::resources const& handle,
            const index_params& params,
            raft::device_matrix_view<const T, IdxT, row_major> dataset,
            raft::neighbors::ivf_flat::index<T, IdxT>& idx) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<T, IdxT>& orig_index,
             const T* new_vectors,
             const IdxT* new_indices,
             IdxT n_rows) -> index<T, IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             const index<T, IdxT>& orig_index) -> index<T, IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<T, IdxT>* index,
             const T* new_vectors,
             const IdxT* new_indices,
             IdxT n_rows) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             index<T, IdxT>* index) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<T, IdxT>& index,
             const T* queries,
@@ -86,7 +86,7 @@ void search(raft::device_resources const& handle,
             rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<T, IdxT>& index,
             raft::device_matrix_view<const T, IdxT, row_major> queries,
@@ -99,7 +99,7 @@ void search(raft::device_resources const& handle,
 
 #define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)        \
   extern template auto raft::neighbors::ivf_flat::build<T, IdxT>( \
-    raft::device_resources const& handle,                         \
+    raft::resources const& handle,                                \
     const raft::neighbors::ivf_flat::index_params& params,        \
     const T* dataset,                                             \
     IdxT n_rows,                                                  \
@@ -107,13 +107,13 @@ void search(raft::device_resources const& handle,
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \
                                                                   \
   extern template auto raft::neighbors::ivf_flat::build<T, IdxT>( \
-    raft::device_resources const& handle,                         \
+    raft::resources const& handle,                                \
     const raft::neighbors::ivf_flat::index_params& params,        \
     raft::device_matrix_view<const T, IdxT, row_major> dataset)   \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \
                                                                   \
   extern template void raft::neighbors::ivf_flat::build<T, IdxT>( \
-    raft::device_resources const& handle,                         \
+    raft::resources const& handle,                                \
     const raft::neighbors::ivf_flat::index_params& params,        \
     raft::device_matrix_view<const T, IdxT, row_major> dataset,   \
     raft::neighbors::ivf_flat::index<T, IdxT>& idx);
@@ -125,7 +125,7 @@ instantiate_raft_neighbors_ivf_flat_build(uint8_t, int64_t);
 
 #define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
   extern template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
@@ -133,21 +133,21 @@ instantiate_raft_neighbors_ivf_flat_build(uint8_t, int64_t);
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   extern template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   extern template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
     IdxT n_rows);                                                          \
                                                                            \
   extern template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     raft::neighbors::ivf_flat::index<T, IdxT>* index);
@@ -160,7 +160,7 @@ instantiate_raft_neighbors_ivf_flat_extend(uint8_t, int64_t);
 
 #define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)        \
   extern template void raft::neighbors::ivf_flat::search<T, IdxT>( \
-    raft::device_resources const& handle,                          \
+    raft::resources const& handle,                                 \
     const raft::neighbors::ivf_flat::search_params& params,        \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \
     const T* queries,                                              \
@@ -171,7 +171,7 @@ instantiate_raft_neighbors_ivf_flat_extend(uint8_t, int64_t);
     rmm::mr::device_memory_resource* mr);                          \
                                                                    \
   extern template void raft::neighbors::ivf_flat::search<T, IdxT>( \
-    raft::device_resources const& handle,                          \
+    raft::resources const& handle,                                 \
     const raft::neighbors::ivf_flat::search_params& params,        \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \
     raft::device_matrix_view<const T, IdxT, row_major> queries,    \
diff --git a/cpp/include/raft/neighbors/ivf_flat-inl.cuh b/cpp/include/raft/neighbors/ivf_flat-inl.cuh
index 4f8d7f596e..739e012e08 100644
--- a/cpp/include/raft/neighbors/ivf_flat-inl.cuh
+++ b/cpp/include/raft/neighbors/ivf_flat-inl.cuh
@@ -21,7 +21,7 @@
 #include <raft/neighbors/ivf_flat_serialize.cuh>
 #include <raft/neighbors/ivf_flat_types.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <rmm/cuda_stream_view.hpp>
@@ -62,7 +62,7 @@ namespace raft::neighbors::ivf_flat {
  * @return the constructed ivf-flat index
  */
 template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            const T* dataset,
            IdxT n_rows,
@@ -107,7 +107,7 @@ auto build(raft::device_resources const& handle,
  * @return the constructed ivf-flat index
  */
 template <typename T, typename IdxT>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            raft::device_matrix_view<const T, IdxT, row_major> dataset) -> index<T, IdxT>
 {
@@ -150,7 +150,7 @@ auto build(raft::device_resources const& handle,
  *
  */
 template <typename T, typename IdxT>
-void build(raft::device_resources const& handle,
+void build(raft::resources const& handle,
            const index_params& params,
            raft::device_matrix_view<const T, IdxT, row_major> dataset,
            raft::neighbors::ivf_flat::index<T, IdxT>& idx)
@@ -197,7 +197,7 @@ void build(raft::device_resources const& handle,
  * @return the constructed extended ivf-flat index
  */
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<T, IdxT>& orig_index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -245,7 +245,7 @@ auto extend(raft::device_resources const& handle,
  * @return the constructed extended ivf-flat index
  */
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             const index<T, IdxT>& orig_index) -> index<T, IdxT>
@@ -286,7 +286,7 @@ auto extend(raft::device_resources const& handle,
  * @param[in] n_rows the number of samples
  */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<T, IdxT>* index,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -327,7 +327,7 @@ void extend(raft::device_resources const& handle,
  * @param[inout] index pointer to index, to be overwritten in-place
  */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             index<T, IdxT>* index)
@@ -384,7 +384,7 @@ void extend(raft::device_resources const& handle,
  * enough memory pool here to avoid memory allocations within search).
  */
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<T, IdxT>& index,
             const T* queries,
@@ -436,7 +436,7 @@ void search(raft::device_resources const& handle,
  * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k]
  */
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<T, IdxT>& index,
             raft::device_matrix_view<const T, IdxT, row_major> queries,
diff --git a/cpp/include/raft/neighbors/ivf_flat_serialize.cuh b/cpp/include/raft/neighbors/ivf_flat_serialize.cuh
index 77fce13e61..311c31040e 100644
--- a/cpp/include/raft/neighbors/ivf_flat_serialize.cuh
+++ b/cpp/include/raft/neighbors/ivf_flat_serialize.cuh
@@ -31,9 +31,9 @@ namespace raft::neighbors::ivf_flat {
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an output stream
  * std::ostream os(std::cout.rdbuf());
@@ -50,7 +50,7 @@ namespace raft::neighbors::ivf_flat {
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle, std::ostream& os, const index<T, IdxT>& index)
+void serialize(raft::resources const& handle, std::ostream& os, const index<T, IdxT>& index)
 {
   detail::serialize(handle, os, index);
 }
@@ -61,9 +61,9 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -80,7 +80,7 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::device_resources const& handle,
+void serialize(raft::resources const& handle,
                const std::string& filename,
                const index<T, IdxT>& index)
 {
@@ -93,9 +93,9 @@ void serialize(raft::device_resources const& handle,
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an input stream
  * std::istream is(std::cin.rdbuf());
@@ -113,7 +113,7 @@ void serialize(raft::device_resources const& handle,
  * @return raft::neighbors::ivf_flat::index<T, IdxT>
  */
 template <typename T, typename IdxT>
-index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& is)
+index<T, IdxT> deserialize(raft::resources const& handle, std::istream& is)
 {
   return detail::deserialize<T, IdxT>(handle, is);
 }
@@ -124,9 +124,9 @@ index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& i
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -144,7 +144,7 @@ index<T, IdxT> deserialize(raft::device_resources const& handle, std::istream& i
  * @return raft::neighbors::ivf_flat::index<T, IdxT>
  */
 template <typename T, typename IdxT>
-index<T, IdxT> deserialize(raft::device_resources const& handle, const std::string& filename)
+index<T, IdxT> deserialize(raft::resources const& handle, const std::string& filename)
 {
   return detail::deserialize<T, IdxT>(handle, filename);
 }
diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp
index c7abe83f8a..ccdc3f28da 100644
--- a/cpp/include/raft/neighbors/ivf_flat_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp
@@ -17,12 +17,13 @@
 #pragma once
 
 #include "ann_types.hpp"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/mdspan_types.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/ivf_list_types.hpp>
 #include <raft/util/integer_utils.hpp>
@@ -236,7 +237,7 @@ struct index : ann::index {
   ~index()                               = default;
 
   /** Construct an empty index. It needs to be trained and then populated. */
-  index(raft::device_resources const& res,
+  index(raft::resources const& res,
         raft::distance::DistanceType metric,
         uint32_t n_lists,
         bool adaptive_centers,
@@ -259,7 +260,7 @@ struct index : ann::index {
   }
 
   /** Construct an empty index. It needs to be trained and then populated. */
-  index(raft::device_resources const& res, const index_params& params, uint32_t dim)
+  index(raft::resources const& res, const index_params& params, uint32_t dim)
     : index(res,
             params.metric,
             params.n_lists,
@@ -297,9 +298,9 @@ struct index : ann::index {
   /**
    * Update the state of the dependent index members.
    */
-  void recompute_internal_state(raft::device_resources const& res)
+  void recompute_internal_state(raft::resources const& res)
   {
-    auto stream = res.get_stream();
+    auto stream = resource::get_cuda_stream(res);
 
     // Actualize the list pointers
     auto this_lists           = lists();
@@ -319,7 +320,7 @@ struct index : ann::index {
     check_consistency();
   }
 
-  void allocate_center_norms(raft::device_resources const& res)
+  void allocate_center_norms(raft::resources const& res)
   {
     switch (metric_) {
       case raft::distance::DistanceType::L2Expanded:
diff --git a/cpp/include/raft/neighbors/ivf_list.hpp b/cpp/include/raft/neighbors/ivf_list.hpp
index a0ba001f77..ad06a3ee71 100644
--- a/cpp/include/raft/neighbors/ivf_list.hpp
+++ b/cpp/include/raft/neighbors/ivf_list.hpp
@@ -16,13 +16,15 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <raft/neighbors/ivf_list_types.hpp>
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/serialize.hpp>
 #include <raft/util/integer_utils.hpp>
 
@@ -38,7 +40,7 @@ namespace raft::neighbors::ivf {
 template <template <typename, typename...> typename SpecT,
           typename SizeT,
           typename... SpecExtraArgs>
-list<SpecT, SizeT, SpecExtraArgs...>::list(raft::device_resources const& res,
+list<SpecT, SizeT, SpecExtraArgs...>::list(raft::resources const& res,
                                            const spec_type& spec,
                                            size_type n_rows)
   : size{n_rows}, data{res}, indices{res}
@@ -61,7 +63,7 @@ list<SpecT, SizeT, SpecExtraArgs...>::list(raft::device_resources const& res,
       e.what());
   }
   // Fill the index buffer with a pre-defined marker for easier debugging
-  thrust::fill_n(res.get_thrust_policy(),
+  thrust::fill_n(resource::get_thrust_policy(res),
                  indices.data_handle(),
                  indices.size(),
                  ivf::kInvalidRecord<index_type>);
@@ -72,7 +74,7 @@ list<SpecT, SizeT, SpecExtraArgs...>::list(raft::device_resources const& res,
  * copy the data if necessary.
  */
 template <typename ListT>
-void resize_list(raft::device_resources const& res,
+void resize_list(raft::resources const& res,
                  std::shared_ptr<ListT>& orig_list,  // NOLINT
                  const typename ListT::spec_type& spec,
                  typename ListT::size_type new_used_size,
@@ -104,18 +106,18 @@ void resize_list(raft::device_resources const& res,
     copy(copied_view.data_handle(),
          orig_list->data.data_handle(),
          copied_view.size(),
-         res.get_stream());
+         resource::get_cuda_stream(res));
     copy(new_list->indices.data_handle(),
          orig_list->indices.data_handle(),
          old_used_size,
-         res.get_stream());
+         resource::get_cuda_stream(res));
   }
   // swap the shared pointer content with the new list
   new_list.swap(orig_list);
 }
 
 template <typename ListT>
-auto serialize_list(const raft::device_resources& handle,
+auto serialize_list(const raft::resources& handle,
                     std::ostream& os,
                     const ListT& ld,
                     const typename ListT::spec_type& store_spec,
@@ -132,15 +134,21 @@ auto serialize_list(const raft::device_resources& handle,
     make_host_mdarray<typename ListT::value_type, size_type, row_major>(data_extents);
   auto inds_array = make_host_mdarray<typename ListT::index_type, size_type, row_major>(
     make_extents<size_type>(size));
-  copy(data_array.data_handle(), ld.data.data_handle(), data_array.size(), handle.get_stream());
-  copy(inds_array.data_handle(), ld.indices.data_handle(), inds_array.size(), handle.get_stream());
-  handle.sync_stream();
+  copy(data_array.data_handle(),
+       ld.data.data_handle(),
+       data_array.size(),
+       resource::get_cuda_stream(handle));
+  copy(inds_array.data_handle(),
+       ld.indices.data_handle(),
+       inds_array.size(),
+       resource::get_cuda_stream(handle));
+  resource::sync_stream(handle);
   serialize_mdspan(handle, os, data_array.view());
   serialize_mdspan(handle, os, inds_array.view());
 }
 
 template <typename ListT>
-auto serialize_list(const raft::device_resources& handle,
+auto serialize_list(const raft::resources& handle,
                     std::ostream& os,
                     const std::shared_ptr<ListT>& ld,
                     const typename ListT::spec_type& store_spec,
@@ -155,7 +163,7 @@ auto serialize_list(const raft::device_resources& handle,
 }
 
 template <typename ListT>
-auto deserialize_list(const raft::device_resources& handle,
+auto deserialize_list(const raft::resources& handle,
                       std::istream& is,
                       std::shared_ptr<ListT>& ld,
                       const typename ListT::spec_type& store_spec,
@@ -172,11 +180,15 @@ auto deserialize_list(const raft::device_resources& handle,
     make_extents<size_type>(size));
   deserialize_mdspan(handle, is, data_array.view());
   deserialize_mdspan(handle, is, inds_array.view());
-  copy(ld->data.data_handle(), data_array.data_handle(), data_array.size(), handle.get_stream());
+  copy(ld->data.data_handle(),
+       data_array.data_handle(),
+       data_array.size(),
+       resource::get_cuda_stream(handle));
   // NB: copying exactly 'size' indices to leave the rest 'kInvalidRecord' intact.
-  copy(ld->indices.data_handle(), inds_array.data_handle(), size, handle.get_stream());
+  copy(
+    ld->indices.data_handle(), inds_array.data_handle(), size, resource::get_cuda_stream(handle));
   // Make sure the data is copied from host to device before the host arrays get out of the scope.
-  handle.sync_stream();
+  resource::sync_stream(handle);
 }
 
 }  // namespace raft::neighbors::ivf
diff --git a/cpp/include/raft/neighbors/ivf_list_types.hpp b/cpp/include/raft/neighbors/ivf_list_types.hpp
index 50a905c6ae..6317825201 100644
--- a/cpp/include/raft/neighbors/ivf_list_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_list_types.hpp
@@ -17,7 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 #include <atomic>
 #include <limits>
@@ -53,7 +53,7 @@ struct list {
   std::atomic<size_type> size;
 
   /** Allocate a new list capable of holding at least `n_rows` data records and indices. */
-  list(raft::device_resources const& res, const spec_type& spec, size_type n_rows);
+  list(raft::resources const& res, const spec_type& spec, size_type n_rows);
 };
 
 template <typename ListT, class T = void>
diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
index 4b9b0673d4..42dc776c97 100644
--- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
@@ -19,7 +19,7 @@
 #include <cstdint>                                // int64_t
 
 #include <raft/core/device_mdspan.hpp>            // raft::device_matrix_view
-#include <raft/core/device_resources.hpp>         // raft::device_resources
+#include <raft/core/resources.hpp>                // raft::resources
 #include <raft/neighbors/ivf_pq_types.hpp>        // raft::neighbors::ivf_pq::index
 #include <raft/util/raft_explicit.hpp>            // RAFT_EXPLICIT
 #include <rmm/mr/device/per_device_resource.hpp>  // rmm::mr::device_memory_resource
@@ -29,24 +29,24 @@
 namespace raft::neighbors::ivf_pq {
 
 template <typename T, typename IdxT = uint32_t>
-index<IdxT> build(raft::device_resources const& handle,
+index<IdxT> build(raft::resources const& handle,
                   const index_params& params,
                   raft::device_matrix_view<const T, IdxT, row_major> dataset) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-index<IdxT> extend(raft::device_resources const& handle,
+index<IdxT> extend(raft::resources const& handle,
                    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
                    std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,
                    const index<IdxT>& idx) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,
             index<IdxT>* idx) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<IdxT>& idx,
             raft::device_matrix_view<const T, IdxT, row_major> queries,
@@ -54,28 +54,28 @@ void search(raft::device_resources const& handle,
             raft::device_matrix_view<float, IdxT, row_major> distances) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT = uint32_t>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            const T* dataset,
            IdxT n_rows,
            uint32_t dim) -> index<IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<IdxT>& idx,
             const T* new_vectors,
             const IdxT* new_indices,
             IdxT n_rows) -> index<IdxT> RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<IdxT>* idx,
             const T* new_vectors,
             const IdxT* new_indices,
             IdxT n_rows) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const raft::neighbors::ivf_pq::search_params& params,
             const index<IdxT>& idx,
             const T* queries,
@@ -91,12 +91,12 @@ void search(raft::device_resources const& handle,
 
 #define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                        \
   extern template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
-    raft::device_resources const& handle,                                                       \
+    raft::resources const& handle,                                                              \
     const raft::neighbors::ivf_pq::index_params& params,                                        \
     raft::device_matrix_view<const T, IdxT, row_major> dataset);                                \
                                                                                                 \
   extern template auto raft::neighbors::ivf_pq::build(                                          \
-    raft::device_resources const& handle,                                                       \
+    raft::resources const& handle,                                                              \
     const raft::neighbors::ivf_pq::index_params& params,                                        \
     const T* dataset,                                                                           \
     IdxT n_rows,                                                                                \
@@ -111,19 +111,19 @@ instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t);
 
 #define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                        \
   extern template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
-    raft::device_resources const& handle,                                                        \
+    raft::resources const& handle,                                                               \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                              \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,            \
     const raft::neighbors::ivf_pq::index<IdxT>& idx);                                            \
                                                                                                  \
   extern template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                        \
+    raft::resources const& handle,                                                               \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                              \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,            \
     raft::neighbors::ivf_pq::index<IdxT>* idx);                                                  \
                                                                                                  \
   extern template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                        \
+    raft::resources const& handle,                                                               \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,                                             \
     const T* new_vectors,                                                                        \
     const IdxT* new_indices,                                                                     \
@@ -131,7 +131,7 @@ instantiate_raft_neighbors_ivf_pq_build(uint8_t, int64_t);
     ->raft::neighbors::ivf_pq::index<IdxT>;                                                      \
                                                                                                  \
   extern template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                        \
+    raft::resources const& handle,                                                               \
     raft::neighbors::ivf_pq::index<IdxT>* idx,                                                   \
     const T* new_vectors,                                                                        \
     const IdxT* new_indices,                                                                     \
@@ -145,7 +145,7 @@ instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
 
 #define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
   extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     raft::device_matrix_view<const T, IdxT, row_major> queries,  \
@@ -153,7 +153,7 @@ instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
     raft::device_matrix_view<float, IdxT, row_major> distances); \
                                                                  \
   extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     const T* queries,                                            \
diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
index dfc24e8214..83e7931c78 100644
--- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
@@ -16,13 +16,14 @@
 
 #pragma once
 
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <raft/neighbors/detail/ivf_pq_build.cuh>
 #include <raft/neighbors/detail/ivf_pq_search.cuh>
 #include <raft/neighbors/ivf_pq_serialize.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
@@ -52,7 +53,7 @@ namespace raft::neighbors::ivf_pq {
  * @return the constructed ivf-pq index
  */
 template <typename T, typename IdxT = uint32_t>
-index<IdxT> build(raft::device_resources const& handle,
+index<IdxT> build(raft::resources const& handle,
                   const index_params& params,
                   raft::device_matrix_view<const T, IdxT, row_major> dataset)
 {
@@ -75,7 +76,7 @@ index<IdxT> build(raft::device_resources const& handle,
  * @param[inout] idx
  */
 template <typename T, typename IdxT>
-index<IdxT> extend(raft::device_resources const& handle,
+index<IdxT> extend(raft::resources const& handle,
                    raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
                    std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
                    const index<IdxT>& idx)
@@ -110,7 +111,7 @@ index<IdxT> extend(raft::device_resources const& handle,
  * @param[inout] idx
  */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             raft::device_matrix_view<const T, IdxT, row_major> new_vectors,
             std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,
             index<IdxT>* idx)
@@ -157,7 +158,7 @@ void extend(raft::device_resources const& handle,
  * k]
  */
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<IdxT>& idx,
             raft::device_matrix_view<const T, IdxT, row_major> queries,
@@ -183,7 +184,7 @@ void search(raft::device_resources const& handle,
                         k,
                         neighbors.data_handle(),
                         distances.data_handle(),
-                        handle.get_workspace_resource());
+                        resource::get_workspace_resource(handle));
 }
 
 /** @} */  // end group ivf_pq
@@ -221,7 +222,7 @@ void search(raft::device_resources const& handle,
  * @return the constructed ivf-pq index
  */
 template <typename T, typename IdxT = uint32_t>
-auto build(raft::device_resources const& handle,
+auto build(raft::resources const& handle,
            const index_params& params,
            const T* dataset,
            IdxT n_rows,
@@ -263,7 +264,7 @@ auto build(raft::device_resources const& handle,
  * @return the constructed extended ivf-pq index
  */
 template <typename T, typename IdxT>
-auto extend(raft::device_resources const& handle,
+auto extend(raft::resources const& handle,
             const index<IdxT>& idx,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -287,7 +288,7 @@ auto extend(raft::device_resources const& handle,
  * @param[in] n_rows the number of samples
  */
 template <typename T, typename IdxT>
-void extend(raft::device_resources const& handle,
+void extend(raft::resources const& handle,
             index<IdxT>* idx,
             const T* new_vectors,
             const IdxT* new_indices,
@@ -339,7 +340,7 @@ void extend(raft::device_resources const& handle,
  * enough memory pool here to avoid memory allocations within search).
  */
 template <typename T, typename IdxT>
-void search(raft::device_resources const& handle,
+void search(raft::resources const& handle,
             const search_params& params,
             const index<IdxT>& idx,
             const T* queries,
diff --git a/cpp/include/raft/neighbors/ivf_pq_helpers.cuh b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh
index 398bd545f1..f00107f629 100644
--- a/cpp/include/raft/neighbors/ivf_pq_helpers.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh
@@ -16,11 +16,12 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/neighbors/detail/ivf_pq_build.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::neighbors::ivf_pq::helpers {
 /**
@@ -60,13 +61,14 @@ namespace codepacker {
  *   it must be smaller than the list size.
  */
 inline void unpack(
-  raft::device_resources const& res,
+  raft::resources const& res,
   device_mdspan<const uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data,
   uint32_t pq_bits,
   uint32_t offset,
   device_matrix_view<uint8_t, uint32_t, row_major> codes)
 {
-  ivf_pq::detail::unpack_list_data(codes, list_data, offset, pq_bits, res.get_stream());
+  ivf_pq::detail::unpack_list_data(
+    codes, list_data, offset, pq_bits, resource::get_cuda_stream(res));
 }
 
 /**
@@ -92,13 +94,13 @@ inline void unpack(
  * @param[in] list_data block to write into
  */
 inline void pack(
-  raft::device_resources const& res,
+  raft::resources const& res,
   device_matrix_view<const uint8_t, uint32_t, row_major> codes,
   uint32_t pq_bits,
   uint32_t offset,
   device_mdspan<uint8_t, list_spec<uint32_t, uint32_t>::list_extents, row_major> list_data)
 {
-  ivf_pq::detail::pack_list_data(list_data, codes, offset, pq_bits, res.get_stream());
+  ivf_pq::detail::pack_list_data(list_data, codes, offset, pq_bits, resource::get_cuda_stream(res));
 }
 }  // namespace codepacker
 
@@ -127,7 +129,7 @@ inline void pack(
  * @param[in] offset how many records to skip before writing the data into the list
  */
 template <typename IdxT>
-void pack_list_data(raft::device_resources const& res,
+void pack_list_data(raft::resources const& res,
                     index<IdxT>* index,
                     device_matrix_view<const uint8_t, uint32_t, row_major> codes,
                     uint32_t label,
@@ -146,8 +148,8 @@ void pack_list_data(raft::device_resources const& res,
  *   uint32_t label = 3;
  *   // Get the list size
  *   uint32_t list_size = 0;
- *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream());
- *   res.sync_stream();
+ *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1,
+ * resource::get_cuda_stream(res)); resource::sync_stream(res);
  *   // allocate the buffer for the output
  *   auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim());
  *   // unpack the whole list
@@ -168,7 +170,7 @@ void pack_list_data(raft::device_resources const& res,
  *   How many records in the list to skip.
  */
 template <typename IdxT>
-void unpack_list_data(raft::device_resources const& res,
+void unpack_list_data(raft::resources const& res,
                       const index<IdxT>& index,
                       device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
                       uint32_t label,
@@ -188,7 +190,7 @@ void unpack_list_data(raft::device_resources const& res,
  *   // Create the selection vector
  *   auto selected_indices = raft::make_device_vector<uint32_t>(res, 4);
  *   ... fill the indices ...
- *   res.sync_stream();
+ *   resource::sync_stream(res);
  *   // allocate the buffer for the output
  *   auto codes = raft::make_device_matrix<float>(res, selected_indices.size(), index.pq_dim());
  *   // decode the whole list
@@ -210,7 +212,7 @@ void unpack_list_data(raft::device_resources const& res,
  *   The id of the list (cluster) to decode.
  */
 template <typename IdxT>
-void unpack_list_data(raft::device_resources const& res,
+void unpack_list_data(raft::resources const& res,
                       const index<IdxT>& index,
                       device_vector_view<const uint32_t> in_cluster_indices,
                       device_matrix_view<uint8_t, uint32_t, row_major> out_codes,
@@ -229,8 +231,8 @@ void unpack_list_data(raft::device_resources const& res,
  *   uint32_t label = 3;
  *   // Get the list size
  *   uint32_t list_size = 0;
- *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream());
- *   res.sync_stream();
+ *   raft::copy(&list_size, index.list_sizes().data_handle() + label, 1,
+ * resource::get_cuda_stream(res)); resource::sync_stream(res);
  *   // allocate the buffer for the output
  *   auto decoded_vectors = raft::make_device_matrix<float>(res, list_size, index.dim());
  *   // decode the whole list
@@ -252,7 +254,7 @@ void unpack_list_data(raft::device_resources const& res,
  *   How many records in the list to skip.
  */
 template <typename T, typename IdxT>
-void reconstruct_list_data(raft::device_resources const& res,
+void reconstruct_list_data(raft::resources const& res,
                            const index<IdxT>& index,
                            device_matrix_view<T, uint32_t, row_major> out_vectors,
                            uint32_t label,
@@ -272,7 +274,7 @@ void reconstruct_list_data(raft::device_resources const& res,
  *   // Create the selection vector
  *   auto selected_indices = raft::make_device_vector<uint32_t>(res, 4);
  *   ... fill the indices ...
- *   res.sync_stream();
+ *   resource::sync_stream(res);
  *   // allocate the buffer for the output
  *   auto decoded_vectors = raft::make_device_matrix<float>(
  *                             res, selected_indices.size(), index.dim());
@@ -296,7 +298,7 @@ void reconstruct_list_data(raft::device_resources const& res,
  *   The id of the list (cluster) to decode.
  */
 template <typename T, typename IdxT>
-void reconstruct_list_data(raft::device_resources const& res,
+void reconstruct_list_data(raft::resources const& res,
                            const index<IdxT>& index,
                            device_vector_view<const uint32_t> in_cluster_indices,
                            device_matrix_view<T, uint32_t, row_major> out_vectors,
@@ -335,7 +337,7 @@ void reconstruct_list_data(raft::device_resources const& res,
  * @param[in] label the id of the target list (cluster).
  */
 template <typename IdxT>
-void extend_list_with_codes(raft::device_resources const& res,
+void extend_list_with_codes(raft::resources const& res,
                             index<IdxT>* index,
                             device_matrix_view<const uint8_t, uint32_t, row_major> new_codes,
                             device_vector_view<const IdxT, uint32_t, row_major> new_indices,
@@ -376,7 +378,7 @@ void extend_list_with_codes(raft::device_resources const& res,
  *
  */
 template <typename T, typename IdxT>
-void extend_list(raft::device_resources const& res,
+void extend_list(raft::resources const& res,
                  index<IdxT>* index,
                  device_matrix_view<const T, uint32_t, row_major> new_vectors,
                  device_vector_view<const IdxT, uint32_t, row_major> new_indices,
@@ -400,7 +402,7 @@ void extend_list(raft::device_resources const& res,
  * @param[in] label the id of the target list (cluster).
  */
 template <typename IdxT>
-void erase_list(raft::device_resources const& res, index<IdxT>* index, uint32_t label)
+void erase_list(raft::resources const& res, index<IdxT>* index, uint32_t label)
 {
   ivf_pq::detail::erase_list(res, index, label);
 }
diff --git a/cpp/include/raft/neighbors/ivf_pq_serialize.cuh b/cpp/include/raft/neighbors/ivf_pq_serialize.cuh
index 2dd9d39d73..f8f92a418c 100644
--- a/cpp/include/raft/neighbors/ivf_pq_serialize.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq_serialize.cuh
@@ -31,9 +31,9 @@ namespace raft::neighbors::ivf_pq {
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an output stream
  * std::ostream os(std::cout.rdbuf());
@@ -49,7 +49,7 @@ namespace raft::neighbors::ivf_pq {
  *
  */
 template <typename IdxT>
-void serialize(raft::device_resources const& handle, std::ostream& os, const index<IdxT>& index)
+void serialize(raft::resources const& handle, std::ostream& os, const index<IdxT>& index)
 {
   detail::serialize(handle, os, index);
 }
@@ -60,9 +60,9 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -78,9 +78,7 @@ void serialize(raft::device_resources const& handle, std::ostream& os, const ind
  *
  */
 template <typename IdxT>
-void serialize(raft::device_resources const& handle,
-               const std::string& filename,
-               const index<IdxT>& index)
+void serialize(raft::resources const& handle, const std::string& filename, const index<IdxT>& index)
 {
   detail::serialize(handle, filename, index);
 }
@@ -91,9 +89,9 @@ void serialize(raft::device_resources const& handle,
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create an input stream
  * std::istream is(std::cin.rdbuf());
@@ -109,7 +107,7 @@ void serialize(raft::device_resources const& handle,
  * @return raft::neighbors::ivf_pq::index<IdxT>
  */
 template <typename IdxT>
-index<IdxT> deserialize(raft::device_resources const& handle, std::istream& is)
+index<IdxT> deserialize(raft::resources const& handle, std::istream& is)
 {
   return detail::deserialize<IdxT>(handle, is);
 }
@@ -120,9 +118,9 @@ index<IdxT> deserialize(raft::device_resources const& handle, std::istream& is)
  * Experimental, both the API and the serialization format are subject to change.
  *
  * @code{.cpp}
- * #include <raft/core/device_resources.hpp>
+ * #include <raft/core/resources.hpp>
  *
- * raft::device_resources handle;
+ * raft::resources handle;
  *
  * // create a string with a filepath
  * std::string filename("/path/to/index");
@@ -138,7 +136,7 @@ index<IdxT> deserialize(raft::device_resources const& handle, std::istream& is)
  * @return raft::neighbors::ivf_pq::index<IdxT>
  */
 template <typename IdxT>
-index<IdxT> deserialize(raft::device_resources const& handle, const std::string& filename)
+index<IdxT> deserialize(raft::resources const& handle, const std::string& filename)
 {
   return detail::deserialize<IdxT>(handle, filename);
 }
diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp
index 4d11bac42e..0899a60d88 100644
--- a/cpp/include/raft/neighbors/ivf_pq_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp
@@ -20,10 +20,10 @@
 #include <raft/neighbors/ivf_list_types.hpp>
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/mdspan_types.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/integer_utils.hpp>
 
@@ -332,7 +332,7 @@ struct index : ann::index {
   ~index()                               = default;
 
   /** Construct an empty index. It needs to be trained and then populated. */
-  index(raft::device_resources const& handle,
+  index(raft::resources const& handle,
         raft::distance::DistanceType metric,
         codebook_gen codebook_kind,
         uint32_t n_lists,
@@ -362,7 +362,7 @@ struct index : ann::index {
   }
 
   /** Construct an empty index. It needs to be trained and then populated. */
-  index(raft::device_resources const& handle, const index_params& params, uint32_t dim)
+  index(raft::resources const& handle, const index_params& params, uint32_t dim)
     : index(handle,
             params.metric,
             params.codebook_kind,
diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh
index 0ba2d2c5ab..c1fd4676dc 100644
--- a/cpp/include/raft/neighbors/refine-ext.cuh
+++ b/cpp/include/raft/neighbors/refine-ext.cuh
@@ -19,8 +19,8 @@
 #include <cstdint>                           // int64_t
 
 #include <raft/core/device_mdspan.hpp>       // raft::device_matrix_view
-#include <raft/core/device_resources.hpp>    // raft::device_resources
 #include <raft/core/host_mdspan.hpp>         // // raft::host_matrix_view
+#include <raft/core/resources.hpp>           // raft::resources
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/util/raft_explicit.hpp>       // RAFT_EXPLICIT
 
@@ -29,7 +29,7 @@
 namespace raft::neighbors {
 
 template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
             raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
             raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
@@ -39,7 +39,7 @@ void refine(raft::device_resources const& handle,
   RAFT_EXPLICIT;
 
 template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,
             raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,
             raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
@@ -54,7 +54,7 @@ void refine(raft::device_resources const& handle,
 
 #define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)       \
   extern template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>( \
-    raft::device_resources const& handle,                                              \
+    raft::resources const& handle,                                                     \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,             \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,             \
     raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,  \
@@ -63,7 +63,7 @@ void refine(raft::device_resources const& handle,
     raft::distance::DistanceType metric);                                              \
                                                                                        \
   extern template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>( \
-    raft::device_resources const& handle,                                              \
+    raft::resources const& handle,                                                     \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,               \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,               \
     raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,    \
diff --git a/cpp/include/raft/neighbors/refine-inl.cuh b/cpp/include/raft/neighbors/refine-inl.cuh
index 2c4dfb422e..3ef86c1ba7 100644
--- a/cpp/include/raft/neighbors/refine-inl.cuh
+++ b/cpp/include/raft/neighbors/refine-inl.cuh
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/neighbors/detail/refine.cuh>
 #include <raft/spatial/knn/detail/ann_utils.cuh>
 
@@ -67,7 +67,7 @@ namespace raft::neighbors {
  * @param[in] metric distance metric to use. Euclidean (L2) is used by default
  */
 template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,
             raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,
             raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
@@ -89,7 +89,7 @@ void refine(raft::device_resources const& handle,
  * @param[in] metric distance metric to use. Euclidean (L2) is used by default
  */
 template <typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,
             raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,
             raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,
diff --git a/cpp/include/raft/neighbors/specializations/detail/ball_cover_lowdim.hpp b/cpp/include/raft/neighbors/specializations/detail/ball_cover_lowdim.hpp
index c558ab8b56..fa2689e960 100644
--- a/cpp/include/raft/neighbors/specializations/detail/ball_cover_lowdim.hpp
+++ b/cpp/include/raft/neighbors/specializations/detail/ball_cover_lowdim.hpp
@@ -24,7 +24,7 @@ namespace knn {
 namespace detail {
 
 extern template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t, 2>(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
   const float* query,
   const std::uint32_t n_query_rows,
@@ -38,7 +38,7 @@ extern template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t, 2>
   std::uint32_t* dists_counter);
 
 extern template void rbc_low_dim_pass_two<std::int64_t, float, std::uint32_t, 2>(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
   const float* query,
   const std::uint32_t n_query_rows,
@@ -52,7 +52,7 @@ extern template void rbc_low_dim_pass_two<std::int64_t, float, std::uint32_t, 2>
   std::uint32_t* post_dists_counter);
 
 extern template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t, 3>(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
   const float* query,
   const std::uint32_t n_query_rows,
@@ -66,7 +66,7 @@ extern template void rbc_low_dim_pass_one<std::int64_t, float, std::uint32_t, 3>
   std::uint32_t* dists_counter);
 
 extern template void rbc_low_dim_pass_two<std::int64_t, float, std::uint32_t, 3>(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const BallCoverIndex<std::int64_t, float, std::uint32_t>& index,
   const float* query,
   const std::uint32_t n_query_rows,
diff --git a/cpp/include/raft/solver/detail/lap_functions.cuh b/cpp/include/raft/solver/detail/lap_functions.cuh
index 63f27e6346..30a4961abf 100644
--- a/cpp/include/raft/solver/detail/lap_functions.cuh
+++ b/cpp/include/raft/solver/detail/lap_functions.cuh
@@ -24,9 +24,10 @@
  */
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/solver/linear_assignment_types.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/solver/detail/lap_kernels.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
@@ -98,7 +99,7 @@ inline void calculateRectangularDims(
 }
 
 template <typename vertex_t, typename weight_t>
-inline void initialReduction(raft::device_resources const& handle,
+inline void initialReduction(raft::resources const& handle,
                              weight_t const* d_costs,
                              Vertices<vertex_t, weight_t>& d_vertices_dev,
                              int SP,
@@ -110,22 +111,25 @@ inline void initialReduction(raft::device_resources const& handle,
 
   detail::calculateRectangularDims(blocks_per_grid, threads_per_block, total_blocks, N, SP);
 
-  kernel_rowReduction<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_rowReduction<<<blocks_per_grid, threads_per_block, 0, resource::get_cuda_stream(handle)>>>(
     d_costs, d_vertices_dev.row_duals, SP, N, std::numeric_limits<weight_t>::max());
 
-  RAFT_CHECK_CUDA(handle.get_stream());
-  kernel_columnReduction<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
+  kernel_columnReduction<<<blocks_per_grid,
+                           threads_per_block,
+                           0,
+                           resource::get_cuda_stream(handle)>>>(
     d_costs,
     d_vertices_dev.row_duals,
     d_vertices_dev.col_duals,
     SP,
     N,
     std::numeric_limits<weight_t>::max());
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 }
 
 template <typename vertex_t, typename weight_t>
-inline void computeInitialAssignments(raft::device_resources const& handle,
+inline void computeInitialAssignments(raft::resources const& handle,
                                       weight_t const* d_costs,
                                       Vertices<vertex_t, weight_t>& d_vertices,
                                       int SP,
@@ -138,8 +142,8 @@ inline void computeInitialAssignments(raft::device_resources const& handle,
 
   std::size_t size = SP * N;
 
-  rmm::device_uvector<int> row_lock_v(size, handle.get_stream());
-  rmm::device_uvector<int> col_lock_v(size, handle.get_stream());
+  rmm::device_uvector<int> row_lock_v(size, resource::get_cuda_stream(handle));
+  rmm::device_uvector<int> col_lock_v(size, resource::get_cuda_stream(handle));
 
   thrust::fill_n(thrust::device, d_vertices.row_assignments, size, -1);
   thrust::fill_n(thrust::device, d_vertices.col_assignments, size, -1);
@@ -148,7 +152,10 @@ inline void computeInitialAssignments(raft::device_resources const& handle,
 
   detail::calculateRectangularDims(blocks_per_grid, threads_per_block, total_blocks, N, SP);
 
-  kernel_computeInitialAssignments<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_computeInitialAssignments<<<blocks_per_grid,
+                                     threads_per_block,
+                                     0,
+                                     resource::get_cuda_stream(handle)>>>(
     d_costs,
     d_vertices.row_duals,
     d_vertices.col_duals,
@@ -159,12 +166,12 @@ inline void computeInitialAssignments(raft::device_resources const& handle,
     SP,
     N,
     epsilon);
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 }
 
 // Function for finding row cover on individual devices.
 template <typename vertex_t, typename weight_t>
-inline int computeRowCovers(raft::device_resources const& handle,
+inline int computeRowCovers(raft::resources const& handle,
                             Vertices<vertex_t, weight_t>& d_vertices,
                             VertexData<vertex_t>& d_row_data,
                             VertexData<vertex_t>& d_col_data,
@@ -188,17 +195,20 @@ inline int computeRowCovers(raft::device_resources const& handle,
   thrust::fill_n(thrust::device, d_col_data.children, size, vertex_t{-1});
 
   detail::calculateRectangularDims(blocks_per_grid, threads_per_block, total_blocks, N, SP);
-  kernel_computeRowCovers<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_computeRowCovers<<<blocks_per_grid,
+                            threads_per_block,
+                            0,
+                            resource::get_cuda_stream(handle)>>>(
     d_vertices.row_assignments, d_vertices.row_covers, d_row_data.is_visited, SP, N);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
   return thrust::reduce(thrust::device, d_vertices.row_covers, d_vertices.row_covers + size);
 }
 
 // Function for covering the zeros in uncovered rows and expanding the frontier.
 template <typename vertex_t, typename weight_t>
-inline void coverZeroAndExpand(raft::device_resources const& handle,
+inline void coverZeroAndExpand(raft::resources const& handle,
                                weight_t const* d_costs_dev,
                                vertex_t const* d_rows_csr_neighbors,
                                vertex_t const* d_rows_csr_ptrs,
@@ -216,21 +226,23 @@ inline void coverZeroAndExpand(raft::device_resources const& handle,
 
   detail::calculateRectangularDims(blocks_per_grid, threads_per_block, total_blocks, N, SP);
 
-  kernel_coverAndExpand<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
-    d_flag,
-    d_rows_csr_ptrs,
-    d_rows_csr_neighbors,
-    d_costs_dev,
-    d_vertices_dev,
-    d_row_data_dev,
-    d_col_data_dev,
-    SP,
-    N,
-    epsilon);
+  kernel_coverAndExpand<<<blocks_per_grid,
+                          threads_per_block,
+                          0,
+                          resource::get_cuda_stream(handle)>>>(d_flag,
+                                                               d_rows_csr_ptrs,
+                                                               d_rows_csr_neighbors,
+                                                               d_costs_dev,
+                                                               d_vertices_dev,
+                                                               d_row_data_dev,
+                                                               d_col_data_dev,
+                                                               SP,
+                                                               N,
+                                                               epsilon);
 }
 
 template <typename vertex_t, typename weight_t>
-inline vertex_t zeroCoverIteration(raft::device_resources const& handle,
+inline vertex_t zeroCoverIteration(raft::resources const& handle,
                                    weight_t const* d_costs_dev,
                                    Vertices<vertex_t, weight_t>& d_vertices_dev,
                                    VertexData<vertex_t>& d_row_data_dev,
@@ -242,21 +254,21 @@ inline vertex_t zeroCoverIteration(raft::device_resources const& handle,
 {
   vertex_t M;
 
-  rmm::device_uvector<vertex_t> csr_ptrs_v(0, handle.get_stream());
-  rmm::device_uvector<vertex_t> csr_neighbors_v(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> csr_ptrs_v(0, resource::get_cuda_stream(handle));
+  rmm::device_uvector<vertex_t> csr_neighbors_v(0, resource::get_cuda_stream(handle));
 
   {
     dim3 blocks_per_grid;
     dim3 threads_per_block;
     int total_blocks = 0;
 
-    rmm::device_uvector<bool> predicates_v(SP * N, handle.get_stream());
-    rmm::device_uvector<vertex_t> addresses_v(SP * N, handle.get_stream());
+    rmm::device_uvector<bool> predicates_v(SP * N, resource::get_cuda_stream(handle));
+    rmm::device_uvector<vertex_t> addresses_v(SP * N, resource::get_cuda_stream(handle));
 
     thrust::fill_n(thrust::device, predicates_v.data(), SP * N, false);
     thrust::fill_n(thrust::device, addresses_v.data(), SP * N, vertex_t{0});
 
-    csr_ptrs_v.resize(SP + 1, handle.get_stream());
+    csr_ptrs_v.resize(SP + 1, resource::get_cuda_stream(handle));
 
     thrust::fill_n(thrust::device, csr_ptrs_v.data(), (SP + 1), vertex_t{-1});
 
@@ -266,27 +278,29 @@ inline vertex_t zeroCoverIteration(raft::device_resources const& handle,
     kernel_rowPredicateConstructionCSR<<<blocks_per_grid,
                                          threads_per_block,
                                          0,
-                                         handle.get_stream()>>>(
+                                         resource::get_cuda_stream(handle)>>>(
       predicates_v.data(), addresses_v.data(), d_row_data_dev.is_visited, SP, N);
-    RAFT_CHECK_CUDA(handle.get_stream());
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
     M = thrust::reduce(thrust::device, addresses_v.begin(), addresses_v.end());
     thrust::exclusive_scan(
       thrust::device, addresses_v.begin(), addresses_v.end(), addresses_v.begin());
 
     if (M > 0) {
-      csr_neighbors_v.resize(M, handle.get_stream());
-
-      kernel_rowScatterCSR<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
-        predicates_v.data(),
-        addresses_v.data(),
-        csr_neighbors_v.data(),
-        csr_ptrs_v.data(),
-        M,
-        SP,
-        N);
-
-      RAFT_CHECK_CUDA(handle.get_stream());
+      csr_neighbors_v.resize(M, resource::get_cuda_stream(handle));
+
+      kernel_rowScatterCSR<<<blocks_per_grid,
+                             threads_per_block,
+                             0,
+                             resource::get_cuda_stream(handle)>>>(predicates_v.data(),
+                                                                  addresses_v.data(),
+                                                                  csr_neighbors_v.data(),
+                                                                  csr_ptrs_v.data(),
+                                                                  M,
+                                                                  SP,
+                                                                  N);
+
+      RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
     }
   }
 
@@ -310,7 +324,7 @@ inline vertex_t zeroCoverIteration(raft::device_resources const& handle,
 // Function for executing recursive zero cover. Returns the next step (Step 4 or Step 5) depending
 // on the presence of uncovered zeros.
 template <typename vertex_t, typename weight_t>
-inline void executeZeroCover(raft::device_resources const& handle,
+inline void executeZeroCover(raft::resources const& handle,
                              weight_t const* d_costs_dev,
                              Vertices<vertex_t, weight_t>& d_vertices_dev,
                              VertexData<vertex_t>& d_row_data_dev,
@@ -329,7 +343,7 @@ inline void executeZeroCover(raft::device_resources const& handle,
 
 // Function for executing reverse pass of the maximum matching.
 template <typename vertex_t>
-inline void reversePass(raft::device_resources const& handle,
+inline void reversePass(raft::resources const& handle,
                         VertexData<vertex_t>& d_row_data_dev,
                         VertexData<vertex_t>& d_col_data_dev,
                         int SP,
@@ -343,8 +357,8 @@ inline void reversePass(raft::device_resources const& handle,
 
   detail::calculateLinearDims(blocks_per_grid, threads_per_block, total_blocks, size);
 
-  rmm::device_uvector<bool> predicates_v(size, handle.get_stream());
-  rmm::device_uvector<vertex_t> addresses_v(size, handle.get_stream());
+  rmm::device_uvector<bool> predicates_v(size, resource::get_cuda_stream(handle));
+  rmm::device_uvector<vertex_t> addresses_v(size, resource::get_cuda_stream(handle));
 
   thrust::fill_n(thrust::device, predicates_v.data(), size, false);
   thrust::fill_n(thrust::device, addresses_v.data(), size, vertex_t{0});
@@ -353,10 +367,10 @@ inline void reversePass(raft::device_resources const& handle,
   kernel_augmentPredicateConstruction<<<blocks_per_grid,
                                         threads_per_block,
                                         0,
-                                        handle.get_stream()>>>(
+                                        resource::get_cuda_stream(handle)>>>(
     predicates_v.data(), addresses_v.data(), d_col_data_dev.is_visited, size);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
   // calculate total number of vertices.
   std::size_t csr_size = thrust::reduce(thrust::device, addresses_v.begin(), addresses_v.end());
@@ -370,22 +384,28 @@ inline void reversePass(raft::device_resources const& handle,
     dim3 threads_per_block_1;
     detail::calculateLinearDims(blocks_per_grid_1, threads_per_block_1, total_blocks_1, csr_size);
 
-    rmm::device_uvector<vertex_t> elements_v(csr_size, handle.get_stream());
+    rmm::device_uvector<vertex_t> elements_v(csr_size, resource::get_cuda_stream(handle));
 
-    kernel_augmentScatter<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+    kernel_augmentScatter<<<blocks_per_grid,
+                            threads_per_block,
+                            0,
+                            resource::get_cuda_stream(handle)>>>(
       elements_v.data(), predicates_v.data(), addresses_v.data(), size);
 
-    RAFT_CHECK_CUDA(handle.get_stream());
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
-    kernel_reverseTraversal<<<blocks_per_grid_1, threads_per_block_1, 0, handle.get_stream()>>>(
+    kernel_reverseTraversal<<<blocks_per_grid_1,
+                              threads_per_block_1,
+                              0,
+                              resource::get_cuda_stream(handle)>>>(
       elements_v.data(), d_row_data_dev, d_col_data_dev, csr_size);
-    RAFT_CHECK_CUDA(handle.get_stream());
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
   }
 }
 
 // Function for executing augmentation pass of the maximum matching.
 template <typename vertex_t, typename weight_t>
-inline void augmentationPass(raft::device_resources const& handle,
+inline void augmentationPass(raft::resources const& handle,
                              Vertices<vertex_t, weight_t>& d_vertices_dev,
                              VertexData<vertex_t>& d_row_data_dev,
                              VertexData<vertex_t>& d_col_data_dev,
@@ -397,8 +417,8 @@ inline void augmentationPass(raft::device_resources const& handle,
   dim3 threads_per_block;
   detail::calculateLinearDims(blocks_per_grid, threads_per_block, total_blocks, SP * N);
 
-  rmm::device_uvector<bool> predicates_v(SP * N, handle.get_stream());
-  rmm::device_uvector<vertex_t> addresses_v(SP * N, handle.get_stream());
+  rmm::device_uvector<bool> predicates_v(SP * N, resource::get_cuda_stream(handle));
+  rmm::device_uvector<vertex_t> addresses_v(SP * N, resource::get_cuda_stream(handle));
 
   thrust::fill_n(thrust::device, predicates_v.data(), SP * N, false);
   thrust::fill_n(thrust::device, addresses_v.data(), SP * N, vertex_t{0});
@@ -407,10 +427,10 @@ inline void augmentationPass(raft::device_resources const& handle,
   kernel_augmentPredicateConstruction<<<blocks_per_grid,
                                         threads_per_block,
                                         0,
-                                        handle.get_stream()>>>(
+                                        resource::get_cuda_stream(handle)>>>(
     predicates_v.data(), addresses_v.data(), d_row_data_dev.is_visited, SP * N);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
   // calculate total number of vertices.
   // TODO: should be vertex_t
@@ -427,28 +447,33 @@ inline void augmentationPass(raft::device_resources const& handle,
     detail::calculateLinearDims(
       blocks_per_grid_1, threads_per_block_1, total_blocks_1, row_ids_csr_size);
 
-    rmm::device_uvector<vertex_t> elements_v(row_ids_csr_size, handle.get_stream());
+    rmm::device_uvector<vertex_t> elements_v(row_ids_csr_size, resource::get_cuda_stream(handle));
 
-    kernel_augmentScatter<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+    kernel_augmentScatter<<<blocks_per_grid,
+                            threads_per_block,
+                            0,
+                            resource::get_cuda_stream(handle)>>>(
       elements_v.data(), predicates_v.data(), addresses_v.data(), vertex_t{SP * N});
 
-    RAFT_CHECK_CUDA(handle.get_stream());
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
-    kernel_augmentation<<<blocks_per_grid_1, threads_per_block_1, 0, handle.get_stream()>>>(
-      d_vertices_dev.row_assignments,
-      d_vertices_dev.col_assignments,
-      elements_v.data(),
-      d_row_data_dev,
-      d_col_data_dev,
-      vertex_t{N},
-      row_ids_csr_size);
+    kernel_augmentation<<<blocks_per_grid_1,
+                          threads_per_block_1,
+                          0,
+                          resource::get_cuda_stream(handle)>>>(d_vertices_dev.row_assignments,
+                                                               d_vertices_dev.col_assignments,
+                                                               elements_v.data(),
+                                                               d_row_data_dev,
+                                                               d_col_data_dev,
+                                                               vertex_t{N},
+                                                               row_ids_csr_size);
 
-    RAFT_CHECK_CUDA(handle.get_stream());
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
   }
 }
 
 template <typename vertex_t, typename weight_t>
-inline void dualUpdate(raft::device_resources const& handle,
+inline void dualUpdate(raft::resources const& handle,
                        Vertices<vertex_t, weight_t>& d_vertices_dev,
                        VertexData<vertex_t>& d_row_data_dev,
                        VertexData<vertex_t>& d_col_data_dev,
@@ -460,10 +485,10 @@ inline void dualUpdate(raft::device_resources const& handle,
   dim3 threads_per_block;
   int total_blocks;
 
-  rmm::device_uvector<weight_t> sp_min_v(SP, handle.get_stream());
+  rmm::device_uvector<weight_t> sp_min_v(SP, resource::get_cuda_stream(handle));
 
   detail::calculateLinearDims(blocks_per_grid, threads_per_block, total_blocks, SP);
-  kernel_dualUpdate_1<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_dualUpdate_1<<<blocks_per_grid, threads_per_block, 0, resource::get_cuda_stream(handle)>>>(
     sp_min_v.data(),
     d_vertices_dev.col_slacks,
     d_vertices_dev.col_covers,
@@ -471,10 +496,10 @@ inline void dualUpdate(raft::device_resources const& handle,
     N,
     std::numeric_limits<weight_t>::max());
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 
   detail::calculateRectangularDims(blocks_per_grid, threads_per_block, total_blocks, N, SP);
-  kernel_dualUpdate_2<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_dualUpdate_2<<<blocks_per_grid, threads_per_block, 0, resource::get_cuda_stream(handle)>>>(
     sp_min_v.data(),
     d_vertices_dev.row_duals,
     d_vertices_dev.col_duals,
@@ -488,12 +513,12 @@ inline void dualUpdate(raft::device_resources const& handle,
     std::numeric_limits<weight_t>::max(),
     epsilon);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 }
 
 // Function for calculating optimal objective function value using dual variables.
 template <typename vertex_t, typename weight_t>
-inline void calcObjValDual(raft::device_resources const& handle,
+inline void calcObjValDual(raft::resources const& handle,
                            weight_t* d_obj_val,
                            Vertices<vertex_t, weight_t>& d_vertices_dev,
                            int SP,
@@ -505,15 +530,18 @@ inline void calcObjValDual(raft::device_resources const& handle,
 
   detail::calculateLinearDims(blocks_per_grid, threads_per_block, total_blocks, SP);
 
-  kernel_calcObjValDual<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_calcObjValDual<<<blocks_per_grid,
+                          threads_per_block,
+                          0,
+                          resource::get_cuda_stream(handle)>>>(
     d_obj_val, d_vertices_dev.row_duals, d_vertices_dev.col_duals, SP, N);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 }
 
 // Function for calculating optimal objective function value using dual variables.
 template <typename vertex_t, typename weight_t>
-inline void calcObjValPrimal(raft::device_resources const& handle,
+inline void calcObjValPrimal(raft::resources const& handle,
                              weight_t* d_obj_val,
                              weight_t const* d_costs,
                              vertex_t const* d_row_assignments,
@@ -526,10 +554,13 @@ inline void calcObjValPrimal(raft::device_resources const& handle,
 
   detail::calculateLinearDims(blocks_per_grid, threads_per_block, total_blocks, SP);
 
-  kernel_calcObjValPrimal<<<blocks_per_grid, threads_per_block, 0, handle.get_stream()>>>(
+  kernel_calcObjValPrimal<<<blocks_per_grid,
+                            threads_per_block,
+                            0,
+                            resource::get_cuda_stream(handle)>>>(
     d_obj_val, d_costs, d_row_assignments, SP, N);
 
-  RAFT_CHECK_CUDA(handle.get_stream());
+  RAFT_CHECK_CUDA(resource::get_cuda_stream(handle));
 }
 
 }  // namespace raft::solver::detail
diff --git a/cpp/include/raft/solver/detail/lap_kernels.cuh b/cpp/include/raft/solver/detail/lap_kernels.cuh
index 69930a1460..88def15153 100644
--- a/cpp/include/raft/solver/detail/lap_kernels.cuh
+++ b/cpp/include/raft/solver/detail/lap_kernels.cuh
@@ -26,7 +26,7 @@
 
 #include "../linear_assignment_types.hpp"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <thrust/execution_policy.h>
diff --git a/cpp/include/raft/solver/linear_assignment.cuh b/cpp/include/raft/solver/linear_assignment.cuh
index 6e66bafe1f..a88356a42e 100644
--- a/cpp/include/raft/solver/linear_assignment.cuh
+++ b/cpp/include/raft/solver/linear_assignment.cuh
@@ -28,7 +28,8 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/execution_policy.h>
@@ -61,7 +62,7 @@ class LinearAssignmentProblem {
   Vertices<vertex_t, weight_t> d_vertices_dev;
   VertexData<vertex_t> d_row_data_dev, d_col_data_dev;
 
-  raft::device_resources const& handle_;
+  raft::resources const& handle_;
   rmm::device_uvector<int> row_covers_v;
   rmm::device_uvector<int> col_covers_v;
   rmm::device_uvector<weight_t> row_duals_v;
@@ -84,7 +85,7 @@ class LinearAssignmentProblem {
    * @param batchsize
    * @param epsilon
    */
-  LinearAssignmentProblem(raft::device_resources const& handle,
+  LinearAssignmentProblem(raft::resources const& handle,
                           vertex_t size,
                           vertex_t batchsize,
                           weight_t epsilon)
@@ -93,19 +94,19 @@ class LinearAssignmentProblem {
       batchsize_(batchsize),
       epsilon_(epsilon),
       d_costs_(nullptr),
-      row_covers_v(0, handle_.get_stream()),
-      col_covers_v(0, handle_.get_stream()),
-      row_duals_v(0, handle_.get_stream()),
-      col_duals_v(0, handle_.get_stream()),
-      col_slacks_v(0, handle_.get_stream()),
-      row_is_visited_v(0, handle_.get_stream()),
-      col_is_visited_v(0, handle_.get_stream()),
-      row_parents_v(0, handle_.get_stream()),
-      col_parents_v(0, handle_.get_stream()),
-      row_children_v(0, handle_.get_stream()),
-      col_children_v(0, handle_.get_stream()),
-      obj_val_primal_v(0, handle_.get_stream()),
-      obj_val_dual_v(0, handle_.get_stream())
+      row_covers_v(0, resource::get_cuda_stream(handle_)),
+      col_covers_v(0, resource::get_cuda_stream(handle_)),
+      row_duals_v(0, resource::get_cuda_stream(handle_)),
+      col_duals_v(0, resource::get_cuda_stream(handle_)),
+      col_slacks_v(0, resource::get_cuda_stream(handle_)),
+      row_is_visited_v(0, resource::get_cuda_stream(handle_)),
+      col_is_visited_v(0, resource::get_cuda_stream(handle_)),
+      row_parents_v(0, resource::get_cuda_stream(handle_)),
+      col_parents_v(0, resource::get_cuda_stream(handle_)),
+      row_children_v(0, resource::get_cuda_stream(handle_)),
+      col_children_v(0, resource::get_cuda_stream(handle_)),
+      obj_val_primal_v(0, resource::get_cuda_stream(handle_)),
+      obj_val_dual_v(0, resource::get_cuda_stream(handle_))
   {
   }
 
@@ -169,8 +170,9 @@ class LinearAssignmentProblem {
   weight_t getPrimalObjectiveValue(int spId)
   {
     weight_t result;
-    raft::update_host(&result, obj_val_primal_v.data() + spId, 1, handle_.get_stream());
-    RAFT_CHECK_CUDA(handle_.get_stream());
+    raft::update_host(
+      &result, obj_val_primal_v.data() + spId, 1, resource::get_cuda_stream(handle_));
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle_));
     return result;
   }
 
@@ -182,8 +184,8 @@ class LinearAssignmentProblem {
   weight_t getDualObjectiveValue(int spId)
   {
     weight_t result;
-    raft::update_host(&result, obj_val_dual_v.data() + spId, 1, handle_.get_stream());
-    RAFT_CHECK_CUDA(handle_.get_stream());
+    raft::update_host(&result, obj_val_dual_v.data() + spId, 1, resource::get_cuda_stream(handle_));
+    RAFT_CHECK_CUDA(resource::get_cuda_stream(handle_));
     return result;
   }
 
@@ -191,7 +193,7 @@ class LinearAssignmentProblem {
   // Helper function for initializing global variables and arrays on a single host.
   void initializeDevice()
   {
-    cudaStream_t stream = handle_.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle_);
     row_covers_v.resize(batchsize_ * size_, stream);
     col_covers_v.resize(batchsize_ * size_, stream);
     row_duals_v.resize(batchsize_ * size_, stream);
@@ -269,10 +271,10 @@ class LinearAssignmentProblem {
   {
     int next;
 
-    rmm::device_scalar<bool> flag_v(handle_.get_stream());
+    rmm::device_scalar<bool> flag_v(resource::get_cuda_stream(handle_));
 
     bool h_flag = false;
-    flag_v.set_value_async(h_flag, handle_.get_stream());
+    flag_v.set_value_async(h_flag, resource::get_cuda_stream(handle_));
 
     detail::executeZeroCover(handle_,
                              d_costs_,
@@ -284,7 +286,7 @@ class LinearAssignmentProblem {
                              size_,
                              epsilon_);
 
-    h_flag = flag_v.value(handle_.get_stream());
+    h_flag = flag_v.value(resource::get_cuda_stream(handle_));
 
     next = h_flag ? 4 : 5;
 
diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh
index 09f4135a51..999e64cb0b 100644
--- a/cpp/include/raft/sparse/convert/csr.cuh
+++ b/cpp/include/raft/sparse/convert/csr.cuh
@@ -27,7 +27,7 @@ namespace sparse {
 namespace convert {
 
 template <typename value_t>
-void coo_to_csr(raft::device_resources const& handle,
+void coo_to_csr(raft::resources const& handle,
                 const int* srcRows,
                 const int* srcCols,
                 const value_t* srcVals,
@@ -90,7 +90,7 @@ void sorted_coo_to_csr(COO<T>* coo, int* row_ind, cudaStream_t stream)
  *                         number of non-zeros in adj.
  */
 template <typename index_t = int>
-void adj_to_csr(raft::device_resources const& handle,
+void adj_to_csr(raft::resources const& handle,
                 const bool* adj,         // Row-major adjacency matrix
                 const index_t* row_ind,  // Precomputed row indices
                 index_t num_rows,        // # rows of adj
diff --git a/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh
index 87c534d7b8..b3471d7426 100644
--- a/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh
+++ b/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh
@@ -17,8 +17,9 @@
 #pragma once
 
 #include <cooperative_groups.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/device_atomics.cuh>
 #include <raft/util/vectorized.cuh>
@@ -129,7 +130,7 @@ __global__ void __launch_bounds__(adj_to_csr_tpb)
  *                         number of non-zeros in adj.
  */
 template <typename index_t = int>
-void adj_to_csr(raft::device_resources const& handle,
+void adj_to_csr(raft::resources const& handle,
                 const bool* adj,         // row-major adjacency matrix
                 const index_t* row_ind,  // precomputed row indices
                 index_t num_rows,        // # rows of adj
@@ -138,7 +139,7 @@ void adj_to_csr(raft::device_resources const& handle,
                 index_t* out_col_ind     // output column indices
 )
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   // Check inputs and return early if possible.
   if (num_rows == 0 || num_cols == 0) { return; }
diff --git a/cpp/include/raft/sparse/convert/detail/csr.cuh b/cpp/include/raft/sparse/convert/detail/csr.cuh
index 3f155854c0..11e745f680 100644
--- a/cpp/include/raft/sparse/convert/detail/csr.cuh
+++ b/cpp/include/raft/sparse/convert/detail/csr.cuh
@@ -17,8 +17,10 @@
 #pragma once
 
 #include <cusparse_v2.h>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusparse_handle.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -44,7 +46,7 @@ namespace convert {
 namespace detail {
 
 template <typename value_t>
-void coo_to_csr(raft::device_resources const& handle,
+void coo_to_csr(raft::resources const& handle,
                 const int* srcRows,
                 const int* srcCols,
                 const value_t* srcVals,
@@ -54,8 +56,8 @@ void coo_to_csr(raft::device_resources const& handle,
                 int* dstCols,
                 value_t* dstVals)
 {
-  auto stream         = handle.get_stream();
-  auto cusparseHandle = handle.get_cusparse_handle();
+  auto stream         = resource::get_cuda_stream(handle);
+  auto cusparseHandle = resource::get_cusparse_handle(handle);
   rmm::device_uvector<int> dstRows(nnz, stream);
   RAFT_CUDA_TRY(
     cudaMemcpyAsync(dstRows.data(), srcRows, sizeof(int) * nnz, cudaMemcpyDeviceToDevice, stream));
diff --git a/cpp/include/raft/sparse/detail/cusparse_wrappers.h b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
index fe433d4641..0740e2ab8c 100644
--- a/cpp/include/raft/sparse/detail/cusparse_wrappers.h
+++ b/cpp/include/raft/sparse/detail/cusparse_wrappers.h
@@ -777,7 +777,7 @@ cusparseStatus_t cusparsegemmi(  // NOLINT
   auto return_value =
     cusparsespmm(handle, opB, opA, alpha, matB, matA, beta, matC, alg, ext_buf, stream);
 
-  raft::device_resources rhandle;
+  raft::resources rhandle;
   raft::linalg::transpose(rhandle, CT.data(), C, n, m, stream);
   // destroy matrix/vector descriptors
   CUSPARSE_CHECK(cusparseDestroyDnMat(matA));
diff --git a/cpp/include/raft/sparse/distance/common.h b/cpp/include/raft/sparse/distance/common.h
index 1e5aeb5210..0b866bdc55 100644
--- a/cpp/include/raft/sparse/distance/common.h
+++ b/cpp/include/raft/sparse/distance/common.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 namespace sparse {
@@ -24,7 +24,7 @@ namespace distance {
 
 template <typename value_idx, typename value_t>
 struct distances_config_t {
-  distances_config_t(raft::device_resources const& handle_) : handle(handle_) {}
+  distances_config_t(raft::resources const& handle_) : handle(handle_) {}
 
   // left side
   value_idx a_nrows;
@@ -42,7 +42,7 @@ struct distances_config_t {
   value_idx* b_indices;
   value_t* b_data;
 
-  raft::device_resources const& handle;
+  raft::resources const& handle;
 };
 
 template <typename value_t>
diff --git a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
index cdcb0b7322..630457158b 100644
--- a/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/bin_distance.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <limits.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/sparse/detail/utils.h>
@@ -117,7 +118,7 @@ template <typename value_idx = int, typename value_t = float>
 class jaccard_expanded_distances_t : public distances_t<value_t> {
  public:
   explicit jaccard_expanded_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), workspace(0, config.handle.get_stream()), ip_dists(config)
+    : config_(&config), workspace(0, resource::get_cuda_stream(config.handle)), ip_dists(config)
   {
   }
 
@@ -128,12 +129,13 @@ class jaccard_expanded_distances_t : public distances_t<value_t> {
     value_idx* b_indices = ip_dists.b_rows_coo();
     value_t* b_data      = ip_dists.b_data_coo();
 
-    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz, config_->handle.get_stream());
+    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz,
+                                                   resource::get_cuda_stream(config_->handle));
     raft::sparse::convert::csr_to_coo(config_->a_indptr,
                                       config_->a_nrows,
                                       search_coo_rows.data(),
                                       config_->a_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     compute_bin_distance(out_dists,
                          search_coo_rows.data(),
@@ -144,7 +146,7 @@ class jaccard_expanded_distances_t : public distances_t<value_t> {
                          config_->b_nnz,
                          config_->a_nrows,
                          config_->b_nrows,
-                         config_->handle.get_stream(),
+                         resource::get_cuda_stream(config_->handle),
                          [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
                            value_t q_r_union = q_norm + r_norm;
                            value_t denom     = q_r_union - dot;
@@ -173,7 +175,7 @@ template <typename value_idx = int, typename value_t = float>
 class dice_expanded_distances_t : public distances_t<value_t> {
  public:
   explicit dice_expanded_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), workspace(0, config.handle.get_stream()), ip_dists(config)
+    : config_(&config), workspace(0, resource::get_cuda_stream(config.handle)), ip_dists(config)
   {
   }
 
@@ -184,12 +186,13 @@ class dice_expanded_distances_t : public distances_t<value_t> {
     value_idx* b_indices = ip_dists.b_rows_coo();
     value_t* b_data      = ip_dists.b_data_coo();
 
-    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz, config_->handle.get_stream());
+    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz,
+                                                   resource::get_cuda_stream(config_->handle));
     raft::sparse::convert::csr_to_coo(config_->a_indptr,
                                       config_->a_nrows,
                                       search_coo_rows.data(),
                                       config_->a_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     compute_bin_distance(out_dists,
                          search_coo_rows.data(),
@@ -200,7 +203,7 @@ class dice_expanded_distances_t : public distances_t<value_t> {
                          config_->b_nnz,
                          config_->a_nrows,
                          config_->b_nrows,
-                         config_->handle.get_stream(),
+                         resource::get_cuda_stream(config_->handle),
                          [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
                            value_t q_r_union = q_norm + r_norm;
                            value_t dice      = (2 * dot) / q_r_union;
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
index 53ef0326fb..3a8cf53b6e 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 
 #include "coo_spmv_strategies/dense_smem_strategy.cuh"
 #include "coo_spmv_strategies/hash_strategy.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
@@ -58,7 +59,7 @@ inline void balanced_coo_pairwise_generalized_spmv(
   RAFT_CUDA_TRY(cudaMemsetAsync(out_dists,
                                 0,
                                 sizeof(value_t) * config_.a_nrows * config_.b_nrows,
-                                config_.handle.get_stream()));
+                                resource::get_cuda_stream(config_.handle)));
 
   strategy.dispatch(out_dists, coo_rows_b, product_func, accum_func, write_func, chunk_size);
 };
@@ -114,7 +115,7 @@ inline void balanced_coo_pairwise_generalized_spmv(
   RAFT_CUDA_TRY(cudaMemsetAsync(out_dists,
                                 0,
                                 sizeof(value_t) * config_.a_nrows * config_.b_nrows,
-                                config_.handle.get_stream()));
+                                resource::get_cuda_stream(config_.handle)));
 
   int max_cols = max_cols_per_block<value_idx, value_t>();
 
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/base_strategy.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/base_strategy.cuh
index c4e39c11a0..138471c6cf 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/base_strategy.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/base_strategy.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include "../coo_spmv_kernel.cuh"
 #include "../utils.cuh"
 #include "coo_mask_row_iterators.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <rmm/device_uvector.hpp>
 
@@ -65,25 +66,25 @@ class coo_spmv_strategy {
                                          cudaFuncCachePreferShared));
 
     balanced_coo_generalized_spmv_kernel<strategy_t, indptr_it, value_idx, value_t, false, tpb>
-      <<<n_blocks, tpb, smem, config.handle.get_stream()>>>(strategy,
-                                                            a_indptr,
-                                                            config.a_indices,
-                                                            config.a_data,
-                                                            config.a_nnz,
-                                                            coo_rows_b,
-                                                            config.b_indices,
-                                                            config.b_data,
-                                                            config.a_nrows,
-                                                            config.b_nrows,
-                                                            smem_dim,
-                                                            config.b_nnz,
-                                                            out_dists,
-                                                            n_blocks_per_row,
-                                                            chunk_size,
-                                                            config.b_ncols,
-                                                            product_func,
-                                                            accum_func,
-                                                            write_func);
+      <<<n_blocks, tpb, smem, resource::get_cuda_stream(config.handle)>>>(strategy,
+                                                                          a_indptr,
+                                                                          config.a_indices,
+                                                                          config.a_data,
+                                                                          config.a_nnz,
+                                                                          coo_rows_b,
+                                                                          config.b_indices,
+                                                                          config.b_data,
+                                                                          config.a_nrows,
+                                                                          config.b_nrows,
+                                                                          smem_dim,
+                                                                          config.b_nnz,
+                                                                          out_dists,
+                                                                          n_blocks_per_row,
+                                                                          chunk_size,
+                                                                          config.b_ncols,
+                                                                          product_func,
+                                                                          accum_func,
+                                                                          write_func);
   }
 
   template <typename strategy_t,
@@ -115,25 +116,25 @@ class coo_spmv_strategy {
                                          cudaFuncCachePreferShared));
 
     balanced_coo_generalized_spmv_kernel<strategy_t, indptr_it, value_idx, value_t, true, tpb>
-      <<<n_blocks, tpb, smem, config.handle.get_stream()>>>(strategy,
-                                                            b_indptr,
-                                                            config.b_indices,
-                                                            config.b_data,
-                                                            config.b_nnz,
-                                                            coo_rows_a,
-                                                            config.a_indices,
-                                                            config.a_data,
-                                                            config.b_nrows,
-                                                            config.a_nrows,
-                                                            smem_dim,
-                                                            config.a_nnz,
-                                                            out_dists,
-                                                            n_blocks_per_row,
-                                                            chunk_size,
-                                                            config.a_ncols,
-                                                            product_func,
-                                                            accum_func,
-                                                            write_func);
+      <<<n_blocks, tpb, smem, resource::get_cuda_stream(config.handle)>>>(strategy,
+                                                                          b_indptr,
+                                                                          config.b_indices,
+                                                                          config.b_data,
+                                                                          config.b_nnz,
+                                                                          coo_rows_a,
+                                                                          config.a_indices,
+                                                                          config.a_data,
+                                                                          config.b_nrows,
+                                                                          config.a_nrows,
+                                                                          smem_dim,
+                                                                          config.a_nnz,
+                                                                          out_dists,
+                                                                          n_blocks_per_row,
+                                                                          chunk_size,
+                                                                          config.a_ncols,
+                                                                          product_func,
+                                                                          accum_func,
+                                                                          write_func);
   }
 
  protected:
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/hash_strategy.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/hash_strategy.cuh
index c272d94c14..d21ae29a34 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/hash_strategy.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv_strategies/hash_strategy.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 #pragma once
 
 #include "base_strategy.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <cuco/static_map.cuh>
 
@@ -62,7 +64,7 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
                        std::tuple<value_idx, value_idx>& n_rows_divided,
                        cudaStream_t stream)
   {
-    auto policy = this->config.handle.get_thrust_policy();
+    auto policy = resource::get_thrust_policy(this->config.handle);
 
     auto less                   = thrust::copy_if(policy,
                                 thrust::make_counting_iterator(value_idx(0)),
@@ -91,14 +93,14 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
   {
     auto n_blocks_per_row = raft::ceildiv(this->config.b_nnz, chunk_size * tpb);
     rmm::device_uvector<value_idx> mask_indptr(this->config.a_nrows,
-                                               this->config.handle.get_stream());
+                                               resource::get_cuda_stream(this->config.handle));
     std::tuple<value_idx, value_idx> n_rows_divided;
 
     chunking_needed(this->config.a_indptr,
                     this->config.a_nrows,
                     mask_indptr,
                     n_rows_divided,
-                    this->config.handle.get_stream());
+                    resource::get_cuda_stream(this->config.handle));
 
     auto less_rows = std::get<0>(n_rows_divided);
     if (less_rows > 0) {
@@ -120,16 +122,17 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
 
     auto more_rows = std::get<1>(n_rows_divided);
     if (more_rows > 0) {
-      rmm::device_uvector<value_idx> n_chunks_per_row(more_rows + 1,
-                                                      this->config.handle.get_stream());
-      rmm::device_uvector<value_idx> chunk_indices(0, this->config.handle.get_stream());
+      rmm::device_uvector<value_idx> n_chunks_per_row(
+        more_rows + 1, resource::get_cuda_stream(this->config.handle));
+      rmm::device_uvector<value_idx> chunk_indices(0,
+                                                   resource::get_cuda_stream(this->config.handle));
       chunked_mask_row_it<value_idx>::init(this->config.a_indptr,
                                            mask_indptr.data() + less_rows,
                                            more_rows,
                                            capacity_threshold * map_size,
                                            n_chunks_per_row,
                                            chunk_indices,
-                                           this->config.handle.get_stream());
+                                           resource::get_cuda_stream(this->config.handle));
 
       chunked_mask_row_it<value_idx> more(this->config.a_indptr,
                                           more_rows,
@@ -137,7 +140,7 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
                                           capacity_threshold * map_size,
                                           n_chunks_per_row.data(),
                                           chunk_indices.data(),
-                                          this->config.handle.get_stream());
+                                          resource::get_cuda_stream(this->config.handle));
 
       auto n_more_blocks = more.total_row_blocks * n_blocks_per_row;
       this->_dispatch_base(*this,
@@ -164,14 +167,14 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
   {
     auto n_blocks_per_row = raft::ceildiv(this->config.a_nnz, chunk_size * tpb);
     rmm::device_uvector<value_idx> mask_indptr(this->config.b_nrows,
-                                               this->config.handle.get_stream());
+                                               resource::get_cuda_stream(this->config.handle));
     std::tuple<value_idx, value_idx> n_rows_divided;
 
     chunking_needed(this->config.b_indptr,
                     this->config.b_nrows,
                     mask_indptr,
                     n_rows_divided,
-                    this->config.handle.get_stream());
+                    resource::get_cuda_stream(this->config.handle));
 
     auto less_rows = std::get<0>(n_rows_divided);
     if (less_rows > 0) {
@@ -193,16 +196,17 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
 
     auto more_rows = std::get<1>(n_rows_divided);
     if (more_rows > 0) {
-      rmm::device_uvector<value_idx> n_chunks_per_row(more_rows + 1,
-                                                      this->config.handle.get_stream());
-      rmm::device_uvector<value_idx> chunk_indices(0, this->config.handle.get_stream());
+      rmm::device_uvector<value_idx> n_chunks_per_row(
+        more_rows + 1, resource::get_cuda_stream(this->config.handle));
+      rmm::device_uvector<value_idx> chunk_indices(0,
+                                                   resource::get_cuda_stream(this->config.handle));
       chunked_mask_row_it<value_idx>::init(this->config.b_indptr,
                                            mask_indptr.data() + less_rows,
                                            more_rows,
                                            capacity_threshold * map_size,
                                            n_chunks_per_row,
                                            chunk_indices,
-                                           this->config.handle.get_stream());
+                                           resource::get_cuda_stream(this->config.handle));
 
       chunked_mask_row_it<value_idx> more(this->config.b_indptr,
                                           more_rows,
@@ -210,7 +214,7 @@ class hash_strategy : public coo_spmv_strategy<value_idx, value_t, tpb> {
                                           capacity_threshold * map_size,
                                           n_chunks_per_row.data(),
                                           chunk_indices.data(),
-                                          this->config.handle.get_stream());
+                                          resource::get_cuda_stream(this->config.handle));
 
       auto n_more_blocks = more.total_row_blocks * n_blocks_per_row;
       this->_dispatch_base_rev(*this,
diff --git a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
index d45e643780..ef5bae8aa0 100644
--- a/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/ip_distance.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <limits.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
@@ -46,13 +47,13 @@ class ip_distances_t : public distances_t<value_t> {
    * @param[in] config specifies inputs, outputs, and sizes
    */
   ip_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), coo_rows_b(config.b_nnz, config.handle.get_stream())
+    : config_(&config), coo_rows_b(config.b_nnz, resource::get_cuda_stream(config.handle))
   {
     raft::sparse::convert::csr_to_coo(config_->b_indptr,
                                       config_->b_nrows,
                                       coo_rows_b.data(),
                                       config_->b_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
   }
 
   /**
diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
index 2f165b3ff2..5293b36a26 100644
--- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/spatial/knn/knn.cuh>
 
 #include <raft/distance/distance_types.hpp>
@@ -241,12 +242,13 @@ class l2_expanded_distances_t : public distances_t<value_t> {
     value_idx* b_indices = ip_dists.b_rows_coo();
     value_t* b_data      = ip_dists.b_data_coo();
 
-    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz, config_->handle.get_stream());
+    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz,
+                                                   resource::get_cuda_stream(config_->handle));
     raft::sparse::convert::csr_to_coo(config_->a_indptr,
                                       config_->a_nrows,
                                       search_coo_rows.data(),
                                       config_->a_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     compute_l2(out_dists,
                search_coo_rows.data(),
@@ -257,7 +259,7 @@ class l2_expanded_distances_t : public distances_t<value_t> {
                config_->b_nnz,
                config_->a_nrows,
                config_->b_nrows,
-               config_->handle.get_stream(),
+               resource::get_cuda_stream(config_->handle),
                [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
                  return -2 * dot + q_norm + r_norm;
                });
@@ -294,7 +296,7 @@ class l2_sqrt_expanded_distances_t : public l2_expanded_distances_t<value_idx, v
         int neg = input < 0 ? -1 : 1;
         return raft::sqrt(abs(input) * neg);
       },
-      this->config_->handle.get_stream());
+      resource::get_cuda_stream(this->config_->handle));
   }
 
   ~l2_sqrt_expanded_distances_t() = default;
@@ -315,12 +317,13 @@ class correlation_expanded_distances_t : public distances_t<value_t> {
     value_idx* b_indices = ip_dists.b_rows_coo();
     value_t* b_data      = ip_dists.b_data_coo();
 
-    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz, config_->handle.get_stream());
+    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz,
+                                                   resource::get_cuda_stream(config_->handle));
     raft::sparse::convert::csr_to_coo(config_->a_indptr,
                                       config_->a_nrows,
                                       search_coo_rows.data(),
                                       config_->a_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     compute_corr(out_dists,
                  search_coo_rows.data(),
@@ -332,7 +335,7 @@ class correlation_expanded_distances_t : public distances_t<value_t> {
                  config_->a_nrows,
                  config_->b_nrows,
                  config_->b_ncols,
-                 config_->handle.get_stream());
+                 resource::get_cuda_stream(config_->handle));
   }
 
   ~correlation_expanded_distances_t() = default;
@@ -350,7 +353,7 @@ template <typename value_idx = int, typename value_t = float>
 class cosine_expanded_distances_t : public distances_t<value_t> {
  public:
   explicit cosine_expanded_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), workspace(0, config.handle.get_stream()), ip_dists(config)
+    : config_(&config), workspace(0, resource::get_cuda_stream(config.handle)), ip_dists(config)
   {
   }
 
@@ -361,12 +364,13 @@ class cosine_expanded_distances_t : public distances_t<value_t> {
     value_idx* b_indices = ip_dists.b_rows_coo();
     value_t* b_data      = ip_dists.b_data_coo();
 
-    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz, config_->handle.get_stream());
+    rmm::device_uvector<value_idx> search_coo_rows(config_->a_nnz,
+                                                   resource::get_cuda_stream(config_->handle));
     raft::sparse::convert::csr_to_coo(config_->a_indptr,
                                       config_->a_nrows,
                                       search_coo_rows.data(),
                                       config_->a_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     compute_l2(out_dists,
                search_coo_rows.data(),
@@ -377,7 +381,7 @@ class cosine_expanded_distances_t : public distances_t<value_t> {
                config_->b_nnz,
                config_->a_nrows,
                config_->b_nrows,
-               config_->handle.get_stream(),
+               resource::get_cuda_stream(config_->handle),
                [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
                  value_t norms = raft::sqrt(q_norm) * raft::sqrt(r_norm);
                  // deal with potential for 0 in denominator by forcing 0/1 instead
@@ -410,20 +414,20 @@ template <typename value_idx = int, typename value_t = float>
 class hellinger_expanded_distances_t : public distances_t<value_t> {
  public:
   explicit hellinger_expanded_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), workspace(0, config.handle.get_stream())
+    : config_(&config), workspace(0, resource::get_cuda_stream(config.handle))
   {
   }
 
   void compute(value_t* out_dists)
   {
     rmm::device_uvector<value_idx> coo_rows(std::max(config_->b_nnz, config_->a_nnz),
-                                            config_->handle.get_stream());
+                                            resource::get_cuda_stream(config_->handle));
 
     raft::sparse::convert::csr_to_coo(config_->b_indptr,
                                       config_->b_nrows,
                                       coo_rows.data(),
                                       config_->b_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     balanced_coo_pairwise_generalized_spmv<value_idx, value_t>(
       out_dists,
@@ -442,7 +446,7 @@ class hellinger_expanded_distances_t : public distances_t<value_t> {
         bool rectifier = (1 - input) > 0;
         return raft::sqrt(rectifier * (1 - input));
       },
-      config_->handle.get_stream());
+      resource::get_cuda_stream(config_->handle));
   }
 
   ~hellinger_expanded_distances_t() = default;
@@ -456,7 +460,7 @@ template <typename value_idx = int, typename value_t = float>
 class russelrao_expanded_distances_t : public distances_t<value_t> {
  public:
   explicit russelrao_expanded_distances_t(const distances_config_t<value_idx, value_t>& config)
-    : config_(&config), workspace(0, config.handle.get_stream()), ip_dists(config)
+    : config_(&config), workspace(0, resource::get_cuda_stream(config.handle)), ip_dists(config)
   {
   }
 
@@ -471,9 +475,9 @@ class russelrao_expanded_distances_t : public distances_t<value_t> {
       out_dists,
       config_->a_nrows * config_->b_nrows,
       [=] __device__(value_t input) { return (n_cols - input) * n_cols_inv; },
-      config_->handle.get_stream());
+      resource::get_cuda_stream(config_->handle));
 
-    auto exec_policy  = rmm::exec_policy(config_->handle.get_stream());
+    auto exec_policy  = rmm::exec_policy(resource::get_cuda_stream(config_->handle));
     auto diags        = thrust::counting_iterator<value_idx>(0);
     value_idx b_nrows = config_->b_nrows;
     thrust::for_each(exec_policy, diags, diags + config_->a_nrows, [=] __device__(value_idx input) {
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index f67109afbc..ac78068247 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <limits.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/operators.cuh>
 #include <raft/core/operators.hpp>
@@ -52,13 +53,13 @@ void unexpanded_lp_distances(value_t* out_dists,
                              write_f write_func)
 {
   rmm::device_uvector<value_idx> coo_rows(std::max(config_->b_nnz, config_->a_nnz),
-                                          config_->handle.get_stream());
+                                          resource::get_cuda_stream(config_->handle));
 
   raft::sparse::convert::csr_to_coo(config_->b_indptr,
                                     config_->b_nrows,
                                     coo_rows.data(),
                                     config_->b_nnz,
-                                    config_->handle.get_stream());
+                                    resource::get_cuda_stream(config_->handle));
 
   balanced_coo_pairwise_generalized_spmv<value_idx, value_t>(
     out_dists, *config_, coo_rows.data(), product_func, accum_func, write_func);
@@ -67,7 +68,7 @@ void unexpanded_lp_distances(value_t* out_dists,
                                     config_->a_nrows,
                                     coo_rows.data(),
                                     config_->a_nnz,
-                                    config_->handle.get_stream());
+                                    resource::get_cuda_stream(config_->handle));
 
   balanced_coo_pairwise_generalized_spmv_rev<value_idx, value_t>(
     out_dists, *config_, coo_rows.data(), product_func, accum_func, write_func);
@@ -134,7 +135,7 @@ class l2_sqrt_unexpanded_distances_t : public l2_unexpanded_distances_t<value_id
         int neg = input < 0 ? -1 : 1;
         return raft::sqrt(abs(input) * neg);
       },
-      this->config_->handle.get_stream());
+      resource::get_cuda_stream(this->config_->handle));
   }
 };
 
@@ -207,7 +208,7 @@ class lp_unexpanded_distances_t : public distances_t<value_t> {
                                    out_dists,
                                    config_->a_nrows * config_->b_nrows,
                                    raft::pow_const_op<value_t>(one_over_p),
-                                   config_->handle.get_stream());
+                                   resource::get_cuda_stream(config_->handle));
   }
 
  private:
@@ -233,7 +234,7 @@ class hamming_unexpanded_distances_t : public distances_t<value_t> {
                                    out_dists,
                                    config_->a_nrows * config_->b_nrows,
                                    raft::mul_const_op<value_t>(n_cols),
-                                   config_->handle.get_stream());
+                                   resource::get_cuda_stream(config_->handle));
   }
 
  private:
@@ -275,7 +276,7 @@ class jensen_shannon_unexpanded_distances_t : public distances_t<value_t> {
       out_dists,
       config_->a_nrows * config_->b_nrows,
       [=] __device__(value_t input) { return raft::sqrt(0.5 * input); },
-      config_->handle.get_stream());
+      resource::get_cuda_stream(config_->handle));
   }
 
  private:
@@ -294,13 +295,13 @@ class kl_divergence_unexpanded_distances_t : public distances_t<value_t> {
   void compute(value_t* out_dists)
   {
     rmm::device_uvector<value_idx> coo_rows(std::max(config_->b_nnz, config_->a_nnz),
-                                            config_->handle.get_stream());
+                                            resource::get_cuda_stream(config_->handle));
 
     raft::sparse::convert::csr_to_coo(config_->b_indptr,
                                       config_->b_nrows,
                                       coo_rows.data(),
                                       config_->b_nnz,
-                                      config_->handle.get_stream());
+                                      resource::get_cuda_stream(config_->handle));
 
     balanced_coo_pairwise_generalized_spmv<value_idx, value_t>(
       out_dists,
@@ -314,7 +315,7 @@ class kl_divergence_unexpanded_distances_t : public distances_t<value_t> {
                                    out_dists,
                                    config_->a_nrows * config_->b_nrows,
                                    raft::mul_const_op<value_t>(0.5),
-                                   config_->handle.get_stream());
+                                   resource::get_cuda_stream(config_->handle));
   }
 
  private:
diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
index c64acbfca6..545f218e63 100644
--- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <raft/spectral/cluster_solvers.cuh>
@@ -31,7 +32,7 @@ namespace spectral {
 namespace detail {
 
 template <typename T>
-void fit_embedding(raft::device_resources const& handle,
+void fit_embedding(raft::resources const& handle,
                    int* rows,
                    int* cols,
                    T* vals,
@@ -41,7 +42,7 @@ void fit_embedding(raft::device_resources const& handle,
                    T* out,
                    unsigned long long seed = 1234567)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   rmm::device_uvector<int> src_offsets(n + 1, stream);
   rmm::device_uvector<int> dst_cols(nnz, stream);
   rmm::device_uvector<T> dst_vals(nnz, stream);
@@ -52,7 +53,7 @@ void fit_embedding(raft::device_resources const& handle,
   rmm::device_uvector<T> eigVecs(n * (n_components + 1), stream);
   rmm::device_uvector<int> labels(n, stream);
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   /**
    * Raft spectral clustering
@@ -88,7 +89,7 @@ void fit_embedding(raft::device_resources const& handle,
     using size_type_t  = index_type;
     using value_type_t = value_type;
 
-    std::pair<value_type_t, index_type_t> solve(raft::device_resources const& handle,
+    std::pair<value_type_t, index_type_t> solve(raft::resources const& handle,
                                                 size_type_t n_obs_vecs,
                                                 size_type_t dim,
                                                 value_type_t const* __restrict__ obs,
diff --git a/cpp/include/raft/sparse/linalg/detail/spmm.hpp b/cpp/include/raft/sparse/linalg/detail/spmm.hpp
index b61b561a12..4ad8623076 100644
--- a/cpp/include/raft/sparse/linalg/detail/spmm.hpp
+++ b/cpp/include/raft/sparse/linalg/detail/spmm.hpp
@@ -17,8 +17,10 @@
 
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusparse_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 
 namespace raft {
@@ -117,7 +119,7 @@ cusparseSpMatDescr_t create_descriptor(
  * @param[out] descr_z output dense descriptor
  */
 template <typename ValueType>
-void spmm(raft::device_resources const& handle,
+void spmm(raft::resources const& handle,
           const bool trans_x,
           const bool trans_y,
           const bool is_row_major,
@@ -131,23 +133,24 @@ void spmm(raft::device_resources const& handle,
   auto opY = trans_y ? CUSPARSE_OPERATION_TRANSPOSE : CUSPARSE_OPERATION_NON_TRANSPOSE;
   auto alg = is_row_major ? CUSPARSE_SPMM_CSR_ALG2 : CUSPARSE_SPMM_CSR_ALG1;
   size_t bufferSize;
-  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm_bufferSize(handle.get_cusparse_handle(),
-                                                                  opX,
-                                                                  opY,
-                                                                  alpha,
-                                                                  descr_x,
-                                                                  descr_y,
-                                                                  beta,
-                                                                  descr_z,
-                                                                  alg,
-                                                                  &bufferSize,
-                                                                  handle.get_stream()));
+  RAFT_CUSPARSE_TRY(
+    raft::sparse::detail::cusparsespmm_bufferSize(resource::get_cusparse_handle(handle),
+                                                  opX,
+                                                  opY,
+                                                  alpha,
+                                                  descr_x,
+                                                  descr_y,
+                                                  beta,
+                                                  descr_z,
+                                                  alg,
+                                                  &bufferSize,
+                                                  resource::get_cuda_stream(handle)));
 
-  raft::interruptible::synchronize(handle.get_stream());
+  raft::interruptible::synchronize(resource::get_cuda_stream(handle));
 
-  rmm::device_uvector<ValueType> tmp(bufferSize, handle.get_stream());
+  rmm::device_uvector<ValueType> tmp(bufferSize, resource::get_cuda_stream(handle));
 
-  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(handle.get_cusparse_handle(),
+  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(resource::get_cusparse_handle(handle),
                                                        opX,
                                                        opY,
                                                        alpha,
@@ -157,7 +160,7 @@ void spmm(raft::device_resources const& handle,
                                                        descr_z,
                                                        alg,
                                                        tmp.data(),
-                                                       handle.get_stream()));
+                                                       resource::get_cuda_stream(handle)));
 }
 
 }  // end namespace detail
diff --git a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
index 4ecd447cc4..2bf7483c4e 100644
--- a/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cusparse_v2.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
@@ -325,7 +326,7 @@ void from_knn_symmetrize_matrix(const value_idx* __restrict__ knn_indices,
  * Symmetrizes a COO matrix
  */
 template <typename value_idx, typename value_t>
-void symmetrize(raft::device_resources const& handle,
+void symmetrize(raft::resources const& handle,
                 const value_idx* rows,
                 const value_idx* cols,
                 const value_t* vals,
@@ -334,7 +335,7 @@ void symmetrize(raft::device_resources const& handle,
                 size_t nnz,
                 raft::sparse::COO<value_t, value_idx>& out)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   // copy rows to cols and cols to rows
   rmm::device_uvector<value_idx> symm_rows(nnz * 2, stream);
diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh
index 98e23afcdf..f7ebf50db0 100644
--- a/cpp/include/raft/sparse/linalg/norm.cuh
+++ b/cpp/include/raft/sparse/linalg/norm.cuh
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/norm_types.hpp>
 #include <raft/sparse/linalg/detail/norm.cuh>
 
@@ -87,7 +88,7 @@ void csr_row_normalize_max(const int* ia,  // csr row ind array (sorted by row)
  * @param fin_op the final lambda op
  */
 template <typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
-void rowNormCsr(raft::device_resources const& handle,
+void rowNormCsr(raft::resources const& handle,
                 const IdxType* ia,
                 const Type* data,
                 const IdxType nnz,
@@ -96,7 +97,7 @@ void rowNormCsr(raft::device_resources const& handle,
                 raft::linalg::NormType type,
                 Lambda fin_op = raft::identity_op())
 {
-  detail::rowNormCsrCaller(ia, data, nnz, N, norm, type, fin_op, handle.get_stream());
+  detail::rowNormCsrCaller(ia, data, nnz, N, norm, type, fin_op, resource::get_cuda_stream(handle));
 }
 
 };  // end NAMESPACE linalg
diff --git a/cpp/include/raft/sparse/linalg/spectral.cuh b/cpp/include/raft/sparse/linalg/spectral.cuh
index 35d85e893f..4c0595bf91 100644
--- a/cpp/include/raft/sparse/linalg/spectral.cuh
+++ b/cpp/include/raft/sparse/linalg/spectral.cuh
@@ -16,7 +16,7 @@
 #ifndef __SPARSE_SPECTRAL_H
 #define __SPARSE_SPECTRAL_H
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/linalg/detail/spectral.cuh>
 
 namespace raft {
@@ -24,7 +24,7 @@ namespace sparse {
 namespace spectral {
 
 template <typename T>
-void fit_embedding(raft::device_resources const& handle,
+void fit_embedding(raft::resources const& handle,
                    int* rows,
                    int* cols,
                    T* vals,
diff --git a/cpp/include/raft/sparse/linalg/spmm.cuh b/cpp/include/raft/sparse/linalg/spmm.cuh
index 73170cfc70..064da4d8fb 100644
--- a/cpp/include/raft/sparse/linalg/spmm.cuh
+++ b/cpp/include/raft/sparse/linalg/spmm.cuh
@@ -48,7 +48,7 @@ template <typename ValueType,
           typename NZType,
           typename LayoutPolicyY,
           typename LayoutPolicyZ>
-void spmm(raft::device_resources const& handle,
+void spmm(raft::resources const& handle,
           const bool trans_x,
           const bool trans_y,
           const ValueType* alpha,
diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh
index f34ba4dbd0..c36fe776d8 100644
--- a/cpp/include/raft/sparse/linalg/symmetrize.cuh
+++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh
@@ -149,7 +149,7 @@ void from_knn_symmetrize_matrix(const value_idx* __restrict__ knn_indices,
  * Symmetrizes a COO matrix
  */
 template <typename value_idx, typename value_t>
-void symmetrize(raft::device_resources const& handle,
+void symmetrize(raft::resources const& handle,
                 const value_idx* rows,
                 const value_idx* cols,
                 const value_t* vals,
diff --git a/cpp/include/raft/sparse/linalg/transpose.cuh b/cpp/include/raft/sparse/linalg/transpose.cuh
index dd5a56bed1..4333060ad9 100644
--- a/cpp/include/raft/sparse/linalg/transpose.cuh
+++ b/cpp/include/raft/sparse/linalg/transpose.cuh
@@ -16,7 +16,8 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cusparse_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/linalg/detail/transpose.h>
 
 namespace raft {
@@ -40,7 +41,7 @@ namespace linalg {
  * @param[in] stream : Cuda stream for ordering events
  */
 template <typename value_idx, typename value_t>
-void csr_transpose(raft::device_resources const& handle,
+void csr_transpose(raft::resources const& handle,
                    const value_idx* csr_indptr,
                    const value_idx* csr_indices,
                    const value_t* csr_data,
@@ -52,7 +53,7 @@ void csr_transpose(raft::device_resources const& handle,
                    value_idx nnz,
                    cudaStream_t stream)
 {
-  detail::csr_transpose(handle.get_cusparse_handle(),
+  detail::csr_transpose(resource::get_cusparse_handle(handle),
                         csr_indptr,
                         csr_indices,
                         csr_data,
diff --git a/cpp/include/raft/sparse/neighbors/brute_force.cuh b/cpp/include/raft/sparse/neighbors/brute_force.cuh
index 515213d250..47e00a012f 100644
--- a/cpp/include/raft/sparse/neighbors/brute_force.cuh
+++ b/cpp/include/raft/sparse/neighbors/brute_force.cuh
@@ -15,7 +15,8 @@
  */
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/sparse/neighbors/detail/knn.cuh>
 
@@ -39,7 +40,7 @@ namespace raft::sparse::neighbors::brute_force {
  * @param[out] output_indices dense matrix for output indices (size n_query_rows * k)
  * @param[out] output_dists dense matrix for output distances (size n_query_rows * k)
  * @param[in] k the number of neighbors to query
- * @param[in] handle CUDA handle.get_stream() to order operations with respect to
+ * @param[in] handle CUDA resource::get_cuda_stream(handle) to order operations with respect to
  * @param[in] batch_size_index maximum number of rows to use from index matrix per batch
  * @param[in] batch_size_query maximum number of rows to use from query matrix per batch
  * @param[in] metric distance metric/measure to use
@@ -61,7 +62,7 @@ void knn(const value_idx* idxIndptr,
          value_idx* output_indices,
          value_t* output_dists,
          int k,
-         raft::device_resources const& handle,
+         raft::resources const& handle,
          size_t batch_size_index             = 2 << 14,  // approx 1M
          size_t batch_size_query             = 2 << 14,
          raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded,
diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh
index 90343c1215..fcc6ba349b 100644
--- a/cpp/include/raft/sparse/neighbors/connect_components.cuh
+++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/neighbors/detail/connect_components.cuh>
@@ -64,7 +64,7 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream
  */
 template <typename value_idx, typename value_t, typename red_op>
 void connect_components(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::sparse::COO<value_t, value_idx>& out,
   const value_t* X,
   const value_idx* orig_colors,
diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
index 583ff4dfdc..adcb566cea 100644
--- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
@@ -16,6 +16,8 @@
 #pragma once
 
 #include <cub/cub.cuh>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
@@ -320,7 +322,7 @@ void min_components_by_color(raft::sparse::COO<value_t, value_idx>& coo,
  */
 template <typename value_idx, typename value_t, typename red_op>
 void connect_components(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::sparse::COO<value_t, value_idx>& out,
   const value_t* X,
   const value_idx* orig_colors,
@@ -329,7 +331,7 @@ void connect_components(
   red_op reduction_op,
   raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   RAFT_EXPECTS(metric == raft::distance::DistanceType::L2SqrtExpanded,
                "Fixing connectivities for an unconnected k-NN graph only "
@@ -376,7 +378,7 @@ void connect_components(
   raft::sparse::op::compute_duplicates_mask(
     out_index.data(), colors.data(), nn_colors.data(), n_rows, stream);
 
-  thrust::exclusive_scan(handle.get_thrust_policy(),
+  thrust::exclusive_scan(resource::get_thrust_policy(handle),
                          out_index.data(),
                          out_index.data() + out_index.size(),
                          out_index.data());
@@ -384,7 +386,7 @@ void connect_components(
   // compute final size
   value_idx size = 0;
   raft::update_host(&size, out_index.data() + (out_index.size() - 1), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   size++;
 
diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
index 527fc14208..7d7bcba443 100644
--- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <raft/distance/distance_types.hpp>
@@ -128,7 +129,7 @@ class sparse_knn_t {
                value_idx* output_indices_,
                value_t* output_dists_,
                int k_,
-               raft::device_resources const& handle_,
+               raft::resources const& handle_,
                size_t batch_size_index_             = 2 << 14,  // approx 1M
                size_t batch_size_query_             = 2 << 14,
                raft::distance::DistanceType metric_ = raft::distance::DistanceType::L2Expanded,
@@ -177,21 +178,23 @@ class sparse_knn_t {
        */
 
       rmm::device_uvector<value_idx> query_batch_indptr(query_batcher.batch_rows() + 1,
-                                                        handle.get_stream());
+                                                        resource::get_cuda_stream(handle));
 
-      value_idx n_query_batch_nnz =
-        query_batcher.get_batch_csr_indptr_nnz(query_batch_indptr.data(), handle.get_stream());
+      value_idx n_query_batch_nnz = query_batcher.get_batch_csr_indptr_nnz(
+        query_batch_indptr.data(), resource::get_cuda_stream(handle));
 
-      rmm::device_uvector<value_idx> query_batch_indices(n_query_batch_nnz, handle.get_stream());
-      rmm::device_uvector<value_t> query_batch_data(n_query_batch_nnz, handle.get_stream());
+      rmm::device_uvector<value_idx> query_batch_indices(n_query_batch_nnz,
+                                                         resource::get_cuda_stream(handle));
+      rmm::device_uvector<value_t> query_batch_data(n_query_batch_nnz,
+                                                    resource::get_cuda_stream(handle));
 
       query_batcher.get_batch_csr_indices_data(
-        query_batch_indices.data(), query_batch_data.data(), handle.get_stream());
+        query_batch_indices.data(), query_batch_data.data(), resource::get_cuda_stream(handle));
 
       // A 3-partition temporary merge space to scale the batching. 2 parts for subsequent
       // batches and 1 space for the results of the merge, which get copied back to the top
-      rmm::device_uvector<value_idx> merge_buffer_indices(0, handle.get_stream());
-      rmm::device_uvector<value_t> merge_buffer_dists(0, handle.get_stream());
+      rmm::device_uvector<value_idx> merge_buffer_indices(0, resource::get_cuda_stream(handle));
+      rmm::device_uvector<value_t> merge_buffer_dists(0, resource::get_cuda_stream(handle));
 
       value_t* dists_merge_buffer_ptr;
       value_idx* indices_merge_buffer_ptr;
@@ -203,31 +206,33 @@ class sparse_knn_t {
       for (int j = 0; j < n_batches_idx; j++) {
         idx_batcher.set_batch(j);
 
-        merge_buffer_indices.resize(query_batcher.batch_rows() * k * 3, handle.get_stream());
-        merge_buffer_dists.resize(query_batcher.batch_rows() * k * 3, handle.get_stream());
+        merge_buffer_indices.resize(query_batcher.batch_rows() * k * 3,
+                                    resource::get_cuda_stream(handle));
+        merge_buffer_dists.resize(query_batcher.batch_rows() * k * 3,
+                                  resource::get_cuda_stream(handle));
 
         /**
          * Slice CSR to rows in batch
          */
         rmm::device_uvector<value_idx> idx_batch_indptr(idx_batcher.batch_rows() + 1,
-                                                        handle.get_stream());
-        rmm::device_uvector<value_idx> idx_batch_indices(0, handle.get_stream());
-        rmm::device_uvector<value_t> idx_batch_data(0, handle.get_stream());
+                                                        resource::get_cuda_stream(handle));
+        rmm::device_uvector<value_idx> idx_batch_indices(0, resource::get_cuda_stream(handle));
+        rmm::device_uvector<value_t> idx_batch_data(0, resource::get_cuda_stream(handle));
 
-        value_idx idx_batch_nnz =
-          idx_batcher.get_batch_csr_indptr_nnz(idx_batch_indptr.data(), handle.get_stream());
+        value_idx idx_batch_nnz = idx_batcher.get_batch_csr_indptr_nnz(
+          idx_batch_indptr.data(), resource::get_cuda_stream(handle));
 
-        idx_batch_indices.resize(idx_batch_nnz, handle.get_stream());
-        idx_batch_data.resize(idx_batch_nnz, handle.get_stream());
+        idx_batch_indices.resize(idx_batch_nnz, resource::get_cuda_stream(handle));
+        idx_batch_data.resize(idx_batch_nnz, resource::get_cuda_stream(handle));
 
         idx_batcher.get_batch_csr_indices_data(
-          idx_batch_indices.data(), idx_batch_data.data(), handle.get_stream());
+          idx_batch_indices.data(), idx_batch_data.data(), resource::get_cuda_stream(handle));
 
         /**
          * Compute distances
          */
         size_t dense_size = idx_batcher.batch_rows() * query_batcher.batch_rows();
-        rmm::device_uvector<value_t> batch_dists(dense_size, handle.get_stream());
+        rmm::device_uvector<value_t> batch_dists(dense_size, resource::get_cuda_stream(handle));
 
         RAFT_CUDA_TRY(cudaMemset(batch_dists.data(), 0, batch_dists.size() * sizeof(value_t)));
 
@@ -244,12 +249,13 @@ class sparse_knn_t {
                           batch_dists.data());
 
         // Build batch indices array
-        rmm::device_uvector<value_idx> batch_indices(batch_dists.size(), handle.get_stream());
+        rmm::device_uvector<value_idx> batch_indices(batch_dists.size(),
+                                                     resource::get_cuda_stream(handle));
 
         // populate batch indices array
         value_idx batch_rows = query_batcher.batch_rows(), batch_cols = idx_batcher.batch_rows();
 
-        iota_fill(batch_indices.data(), batch_rows, batch_cols, handle.get_stream());
+        iota_fill(batch_indices.data(), batch_rows, batch_cols, resource::get_cuda_stream(handle));
 
         /**
          * Perform k-selection on batch & merge with other k-selections
@@ -286,22 +292,22 @@ class sparse_knn_t {
         raft::copy_async<value_idx>(merge_buffer_indices.data(),
                                     indices_merge_buffer_tmp_ptr,
                                     batch_rows * k,
-                                    handle.get_stream());
+                                    resource::get_cuda_stream(handle));
         raft::copy_async<value_t>(merge_buffer_dists.data(),
                                   dists_merge_buffer_tmp_ptr,
                                   batch_rows * k,
-                                  handle.get_stream());
+                                  resource::get_cuda_stream(handle));
       }
 
       // Copy final merged batch to output array
       raft::copy_async<value_idx>(output_indices + (rows_processed * k),
                                   merge_buffer_indices.data(),
                                   query_batcher.batch_rows() * k,
-                                  handle.get_stream());
+                                  resource::get_cuda_stream(handle));
       raft::copy_async<value_t>(output_dists + (rows_processed * k),
                                 merge_buffer_dists.data(),
                                 query_batcher.batch_rows() * k,
-                                handle.get_stream());
+                                resource::get_cuda_stream(handle));
 
       rows_processed += query_batcher.batch_rows();
     }
@@ -320,8 +326,9 @@ class sparse_knn_t {
     id_ranges.push_back(0);
     id_ranges.push_back(idx_batcher.batch_start());
 
-    rmm::device_uvector<value_idx> trans(id_ranges.size(), handle.get_stream());
-    raft::update_device(trans.data(), id_ranges.data(), id_ranges.size(), handle.get_stream());
+    rmm::device_uvector<value_idx> trans(id_ranges.size(), resource::get_cuda_stream(handle));
+    raft::update_device(
+      trans.data(), id_ranges.data(), id_ranges.size(), resource::get_cuda_stream(handle));
 
     // combine merge buffers only if there's more than 1 partition to combine
     raft::spatial::knn::knn_merge_parts(merge_buffer_dists,
@@ -331,7 +338,7 @@ class sparse_knn_t {
                                         query_batcher.batch_rows(),
                                         2,
                                         k,
-                                        handle.get_stream(),
+                                        resource::get_cuda_stream(handle),
                                         trans.data());
   }
 
@@ -365,7 +372,7 @@ class sparse_knn_t {
                                  out_indices,
                                  ascending,
                                  n_neighbors,
-                                 handle.get_stream());
+                                 resource::get_cuda_stream(handle));
   }
 
   void compute_distances(csr_batcher_t<value_idx, value_t>& idx_batcher,
@@ -420,7 +427,7 @@ class sparse_knn_t {
 
   int n_idx_rows, n_idx_cols, n_query_rows, n_query_cols, k;
 
-  raft::device_resources const& handle;
+  raft::resources const& handle;
 };
 
 };  // namespace raft::sparse::neighbors::detail
diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh
index d53f2f8df3..61378d71d8 100644
--- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -94,7 +95,7 @@ void conv_indices(in_t* inds, out_t* out, size_t size, cudaStream_t stream)
  * @param c
  */
 template <typename value_idx = int, typename value_t = float>
-void knn_graph(raft::device_resources const& handle,
+void knn_graph(raft::resources const& handle,
                const value_t* X,
                size_t m,
                size_t n,
@@ -104,7 +105,7 @@ void knn_graph(raft::device_resources const& handle,
 {
   size_t k = build_k(m, c);
 
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   size_t nnz = m * k;
 
diff --git a/cpp/include/raft/sparse/neighbors/knn.cuh b/cpp/include/raft/sparse/neighbors/knn.cuh
index 1e8ce48e16..9dea2f5d52 100644
--- a/cpp/include/raft/sparse/neighbors/knn.cuh
+++ b/cpp/include/raft/sparse/neighbors/knn.cuh
@@ -28,6 +28,7 @@
                   " is deprecated and will be removed in a future release." \
                   " Please use the sparse/spatial version instead.")
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/sparse/neighbors/brute_force.cuh>
 
 namespace raft::sparse::neighbors {
@@ -50,7 +51,7 @@ namespace raft::sparse::neighbors {
  * @param[out] output_indices dense matrix for output indices (size n_query_rows * k)
  * @param[out] output_dists dense matrix for output distances (size n_query_rows * k)
  * @param[in] k the number of neighbors to query
- * @param[in] handle CUDA handle.get_stream() to order operations with respect to
+ * @param[in] handle CUDA resource::get_cuda_stream(handle) to order operations with respect to
  * @param[in] batch_size_index maximum number of rows to use from index matrix per batch
  * @param[in] batch_size_query maximum number of rows to use from query matrix per batch
  * @param[in] metric distance metric/measure to use
@@ -72,7 +73,7 @@ void brute_force_knn(const value_idx* idxIndptr,
                      value_idx* output_indices,
                      value_t* output_dists,
                      int k,
-                     raft::device_resources const& handle,
+                     raft::resources const& handle,
                      size_t batch_size_index             = 2 << 14,  // approx 1M
                      size_t batch_size_query             = 2 << 14,
                      raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded,
diff --git a/cpp/include/raft/sparse/neighbors/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/knn_graph.cuh
index dab4b53482..8257afc16f 100644
--- a/cpp/include/raft/sparse/neighbors/knn_graph.cuh
+++ b/cpp/include/raft/sparse/neighbors/knn_graph.cuh
@@ -41,7 +41,7 @@ namespace raft::sparse::neighbors {
  * @param c
  */
 template <typename value_idx = int, typename value_t = float>
-void knn_graph(raft::device_resources const& handle,
+void knn_graph(raft::resources const& handle,
                const value_t* X,
                std::size_t m,
                std::size_t n,
diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh
index 8cdfa49c45..658b63729b 100644
--- a/cpp/include/raft/sparse/op/detail/reduce.cuh
+++ b/cpp/include/raft/sparse/op/detail/reduce.cuh
@@ -17,6 +17,8 @@
 #pragma once
 
 #include <cusparse_v2.h>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
@@ -124,7 +126,7 @@ void compute_duplicates_mask(
  * @param[in] stream cuda ops will be ordered wrt this stream
  */
 template <typename value_idx, typename value_t>
-void max_duplicates(raft::device_resources const& handle,
+void max_duplicates(raft::resources const& handle,
                     raft::sparse::COO<value_t, value_idx>& out,
                     const value_idx* rows,
                     const value_idx* cols,
@@ -133,8 +135,8 @@ void max_duplicates(raft::device_resources const& handle,
                     size_t m,
                     size_t n)
 {
-  auto stream        = handle.get_stream();
-  auto thrust_policy = handle.get_thrust_policy();
+  auto stream        = resource::get_cuda_stream(handle);
+  auto thrust_policy = resource::get_thrust_policy(handle);
 
   // compute diffs & take exclusive scan
   rmm::device_uvector<value_idx> diff(nnz + 1, stream);
@@ -146,7 +148,7 @@ void max_duplicates(raft::device_resources const& handle,
   // compute final size
   value_idx size = 0;
   raft::update_host(&size, diff.data() + (diff.size() - 1), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
   size++;
 
   out.allocate(size, m, n, true, stream);
diff --git a/cpp/include/raft/sparse/op/filter.cuh b/cpp/include/raft/sparse/op/filter.cuh
index 7418b26ec8..c64c05ae4e 100644
--- a/cpp/include/raft/sparse/op/filter.cuh
+++ b/cpp/include/raft/sparse/op/filter.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/op/detail/filter.cuh>
 
diff --git a/cpp/include/raft/sparse/op/reduce.cuh b/cpp/include/raft/sparse/op/reduce.cuh
index 5223100b2a..52f1d3b239 100644
--- a/cpp/include/raft/sparse/op/reduce.cuh
+++ b/cpp/include/raft/sparse/op/reduce.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/op/detail/reduce.cuh>
 
@@ -69,7 +69,7 @@ void compute_duplicates_mask(
  * @param[in] n number of columns in COO input matrix
  */
 template <typename value_idx, typename value_t>
-void max_duplicates(raft::device_resources const& handle,
+void max_duplicates(raft::resources const& handle,
                     raft::sparse::COO<value_t, value_idx>& out,
                     const value_idx* rows,
                     const value_idx* cols,
diff --git a/cpp/include/raft/sparse/op/row_op.cuh b/cpp/include/raft/sparse/op/row_op.cuh
index 17e3659355..a799093226 100644
--- a/cpp/include/raft/sparse/op/row_op.cuh
+++ b/cpp/include/raft/sparse/op/row_op.cuh
@@ -17,7 +17,7 @@
 #define __SPARSE_ROW_OP_H
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/op/detail/row_op.cuh>
 
 namespace raft {
diff --git a/cpp/include/raft/sparse/op/slice.cuh b/cpp/include/raft/sparse/op/slice.cuh
index 22d3f0168d..2da6dad4fc 100644
--- a/cpp/include/raft/sparse/op/slice.cuh
+++ b/cpp/include/raft/sparse/op/slice.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/op/detail/slice.cuh>
 
 namespace raft {
diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh
index e4e69a93c7..c6c3c2e220 100644
--- a/cpp/include/raft/sparse/op/sort.cuh
+++ b/cpp/include/raft/sparse/op/sort.cuh
@@ -18,7 +18,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/op/detail/sort.h>
 
 namespace raft {
diff --git a/cpp/include/raft/sparse/solver/detail/lanczos.cuh b/cpp/include/raft/sparse/solver/detail/lanczos.cuh
index 67d6f6c412..74f8931a17 100644
--- a/cpp/include/raft/sparse/solver/detail/lanczos.cuh
+++ b/cpp/include/raft/sparse/solver/detail/lanczos.cuh
@@ -20,12 +20,14 @@
 #define _USE_MATH_DEFINES
 
 #include <cmath>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <cuda.h>
 #include <curand.h>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/spectral/detail/lapack.hpp>
 #include <raft/spectral/detail/warn_dbg.hpp>
@@ -80,7 +82,7 @@ inline curandStatus_t curandGenerateNormalX(
  *  @return Zero if successful. Otherwise non-zero.
  */
 template <typename index_type_t, typename value_type_t>
-int performLanczosIteration(raft::device_resources const& handle,
+int performLanczosIteration(raft::resources const& handle,
                             spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
                             index_type_t* iter,
                             index_type_t maxIter,
@@ -102,8 +104,8 @@ int performLanczosIteration(raft::device_resources const& handle,
   constexpr value_type_t zero   = 0;
   value_type_t alpha;
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   RAFT_EXPECTS(A != nullptr, "Null matrix pointer.");
 
@@ -270,7 +272,7 @@ int performLanczosIteration(raft::device_resources const& handle,
       cublas_h, n, &alpha, lanczosVecs_dev + IDX(0, *iter, n), 1, stream));
   }
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   return 0;
 }
@@ -541,7 +543,7 @@ static int francisQRIteration(index_type_t n,
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-static int lanczosRestart(raft::device_resources const& handle,
+static int lanczosRestart(raft::resources const& handle,
                           index_type_t n,
                           index_type_t iter,
                           index_type_t iter_new,
@@ -563,8 +565,8 @@ static int lanczosRestart(raft::device_resources const& handle,
   constexpr value_type_t zero = 0;
   constexpr value_type_t one  = 1;
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // Loop index
   index_type_t i;
@@ -744,7 +746,7 @@ static int lanczosRestart(raft::device_resources const& handle,
  */
 template <typename index_type_t, typename value_type_t>
 int computeSmallestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -795,8 +797,8 @@ int computeSmallestEigenvectors(
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
   RAFT_EXPECTS(restartIter >= nEigVecs, "Invalid restartIter.");
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // -------------------------------------------------------
   // Variable initialization
@@ -988,7 +990,7 @@ int computeSmallestEigenvectors(
 
 template <typename index_type_t, typename value_type_t>
 int computeSmallestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -1091,7 +1093,7 @@ int computeSmallestEigenvectors(
  */
 template <typename index_type_t, typename value_type_t>
 int computeLargestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -1142,8 +1144,8 @@ int computeLargestEigenvectors(
   RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
   RAFT_EXPECTS(restartIter >= nEigVecs, "Invalid restartIter.");
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // -------------------------------------------------------
   // Variable initialization
@@ -1338,7 +1340,7 @@ int computeLargestEigenvectors(
 
 template <typename index_type_t, typename value_type_t>
 int computeLargestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
diff --git a/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh b/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh
index 3ed58ea4ef..0182668117 100644
--- a/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh
+++ b/cpp/include/raft/sparse/solver/detail/mst_solver_inl.cuh
@@ -17,6 +17,8 @@
 #pragma once
 
 #include <curand.h>
+#include <raft/core/resource/device_properties.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <raft/sparse/solver/detail/mst_kernels.cuh>
 #include <raft/sparse/solver/detail/mst_utils.cuh>
@@ -60,18 +62,17 @@ inline curandStatus_t curand_generate_uniformX(curandGenerator_t generator,
 }
 
 template <typename vertex_t, typename edge_t, typename weight_t, typename alteration_t>
-MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(
-  raft::device_resources const& handle_,
-  const edge_t* offsets_,
-  const vertex_t* indices_,
-  const weight_t* weights_,
-  const vertex_t v_,
-  const edge_t e_,
-  vertex_t* color_,
-  cudaStream_t stream_,
-  bool symmetrize_output_,
-  bool initialize_colors_,
-  int iterations_)
+MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(raft::resources const& handle_,
+                                                                 const edge_t* offsets_,
+                                                                 const vertex_t* indices_,
+                                                                 const weight_t* weights_,
+                                                                 const vertex_t v_,
+                                                                 const edge_t e_,
+                                                                 vertex_t* color_,
+                                                                 cudaStream_t stream_,
+                                                                 bool symmetrize_output_,
+                                                                 bool initialize_colors_,
+                                                                 int iterations_)
   : handle(handle_),
     offsets(offsets_),
     indices(indices_),
@@ -95,16 +96,16 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(
     initialize_colors(initialize_colors_),
     iterations(iterations_)
 {
-  max_blocks  = handle_.get_device_properties().maxGridSize[0];
-  max_threads = handle_.get_device_properties().maxThreadsPerBlock;
-  sm_count    = handle_.get_device_properties().multiProcessorCount;
+  max_blocks  = resource::get_device_properties(handle_).maxGridSize[0];
+  max_threads = resource::get_device_properties(handle_).maxThreadsPerBlock;
+  sm_count    = resource::get_device_properties(handle_).multiProcessorCount;
 
   mst_edge_count.set_value_to_zero_async(stream);
   prev_mst_edge_count.set_value_to_zero_async(stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(mst_edge.data(), 0, mst_edge.size() * sizeof(bool), stream));
 
   // Initially, color holds the vertex id as color
-  auto policy = handle.get_thrust_policy();
+  auto policy = resource::get_thrust_policy(handle);
   if (initialize_colors_) {
     thrust::sequence(policy, color.begin(), color.end(), 0);
     thrust::sequence(policy, color_index, color_index + v, 0);
@@ -194,7 +195,7 @@ struct alteration_functor {
 template <typename vertex_t, typename edge_t, typename weight_t, typename alteration_t>
 alteration_t MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration_max()
 {
-  auto policy = handle.get_thrust_policy();
+  auto policy = resource::get_thrust_policy(handle);
   rmm::device_uvector<weight_t> tmp(e, stream);
   thrust::device_ptr<const weight_t> weights_ptr(weights);
   thrust::copy(policy, weights_ptr, weights_ptr + e, tmp.begin());
@@ -284,7 +285,7 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::label_prop(vertex_t*
 template <typename vertex_t, typename edge_t, typename weight_t, typename alteration_t>
 void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::min_edge_per_vertex()
 {
-  auto policy = handle.get_thrust_policy();
+  auto policy = resource::get_thrust_policy(handle);
   thrust::fill(
     policy, min_edge_color.begin(), min_edge_color.end(), std::numeric_limits<alteration_t>::max());
   thrust::fill(
@@ -316,7 +317,7 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::min_edge_per_superver
   auto nthreads = std::min(v, max_threads);
   auto nblocks  = std::min((v + nthreads - 1) / nthreads, max_blocks);
 
-  auto policy = handle.get_thrust_policy();
+  auto policy = resource::get_thrust_policy(handle);
   thrust::fill(policy, temp_src.begin(), temp_src.end(), std::numeric_limits<vertex_t>::max());
 
   vertex_t* color_ptr               = color.data();
@@ -385,7 +386,7 @@ template <typename vertex_t, typename edge_t, typename weight_t, typename altera
 void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::append_src_dst_pair(
   vertex_t* mst_src, vertex_t* mst_dst, weight_t* mst_weights)
 {
-  auto policy = handle.get_thrust_policy();
+  auto policy = resource::get_thrust_policy(handle);
 
   edge_t curr_mst_edge_count = prev_mst_edge_count.value(stream);
 
diff --git a/cpp/include/raft/sparse/solver/lanczos.cuh b/cpp/include/raft/sparse/solver/lanczos.cuh
index cdfaaa97f2..1aa56d6ba2 100644
--- a/cpp/include/raft/sparse/solver/lanczos.cuh
+++ b/cpp/include/raft/sparse/solver/lanczos.cuh
@@ -66,7 +66,7 @@ namespace raft::sparse::solver {
  */
 template <typename index_type_t, typename value_type_t>
 int computeSmallestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
@@ -130,7 +130,7 @@ int computeSmallestEigenvectors(
  */
 template <typename index_type_t, typename value_type_t>
 int computeLargestEigenvectors(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
   index_type_t nEigVecs,
   index_type_t maxIter,
diff --git a/cpp/include/raft/sparse/solver/mst.cuh b/cpp/include/raft/sparse/solver/mst.cuh
index 4f7600824a..08f364adc3 100644
--- a/cpp/include/raft/sparse/solver/mst.cuh
+++ b/cpp/include/raft/sparse/solver/mst.cuh
@@ -44,7 +44,7 @@ namespace raft::sparse::solver {
  * when an msf is encountered)
  */
 template <typename vertex_t, typename edge_t, typename weight_t, typename alteration_t = weight_t>
-Graph_COO<vertex_t, edge_t, weight_t> mst(raft::device_resources const& handle,
+Graph_COO<vertex_t, edge_t, weight_t> mst(raft::resources const& handle,
                                           edge_t const* offsets,
                                           vertex_t const* indices,
                                           weight_t const* weights,
diff --git a/cpp/include/raft/sparse/solver/mst_solver.cuh b/cpp/include/raft/sparse/solver/mst_solver.cuh
index 063f215fc8..bfedb9ce2a 100644
--- a/cpp/include/raft/sparse/solver/mst_solver.cuh
+++ b/cpp/include/raft/sparse/solver/mst_solver.cuh
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -39,7 +39,7 @@ struct Graph_COO {
 template <typename vertex_t, typename edge_t, typename weight_t, typename alteration_t>
 class MST_solver {
  public:
-  MST_solver(raft::device_resources const& handle_,
+  MST_solver(raft::resources const& handle_,
              const edge_t* offsets_,
              const vertex_t* indices_,
              const weight_t* weights_,
@@ -56,7 +56,7 @@ class MST_solver {
   ~MST_solver() {}
 
  private:
-  raft::device_resources const& handle;
+  raft::resources const& handle;
   cudaStream_t stream;
   bool symmetrize_output, initialize_colors;
   int iterations;
diff --git a/cpp/include/raft/spatial/knn/ann.cuh b/cpp/include/raft/spatial/knn/ann.cuh
index 3d11ffbef4..cf46c42212 100644
--- a/cpp/include/raft/spatial/knn/ann.cuh
+++ b/cpp/include/raft/spatial/knn/ann.cuh
@@ -38,7 +38,7 @@ namespace raft::spatial::knn {
  */
 template <typename T = float, typename value_idx = int>
 [[deprecated("Consider using new-style raft::spatial::knn::*::build functions")]] inline void
-approx_knn_build_index(raft::device_resources& handle,
+approx_knn_build_index(raft::resources& handle,
                        raft::spatial::knn::knnIndex* index,
                        knnIndexParam* params,
                        raft::distance::DistanceType metric,
@@ -67,7 +67,7 @@ approx_knn_build_index(raft::device_resources& handle,
  */
 template <typename T = float, typename value_idx = int>
 [[deprecated("Consider using new-style raft::spatial::knn::*::search functions")]] inline void
-approx_knn_search(raft::device_resources& handle,
+approx_knn_search(raft::resources& handle,
                   float* distances,
                   int64_t* indices,
                   raft::spatial::knn::knnIndex* index,
diff --git a/cpp/include/raft/spatial/knn/ball_cover.cuh b/cpp/include/raft/spatial/knn/ball_cover.cuh
index f3b1123fa2..f8c3fbd3c0 100644
--- a/cpp/include/raft/spatial/knn/ball_cover.cuh
+++ b/cpp/include/raft/spatial/knn/ball_cover.cuh
@@ -34,14 +34,14 @@
 namespace raft::spatial::knn {
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void rbc_build_index(raft::device_resources const& handle,
+void rbc_build_index(raft::resources const& handle,
                      BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index)
 {
   raft::neighbors::ball_cover::build_index(handle, index);
 }
 
 template <typename idx_t, typename value_t, typename int_t, typename matrix_idx_t>
-void rbc_all_knn_query(raft::device_resources const& handle,
+void rbc_all_knn_query(raft::resources const& handle,
                        BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,
                        int_t k,
                        idx_t* inds,
@@ -54,7 +54,7 @@ void rbc_all_knn_query(raft::device_resources const& handle,
 }
 
 template <typename idx_t, typename value_t, typename int_t>
-void rbc_knn_query(raft::device_resources const& handle,
+void rbc_knn_query(raft::resources const& handle,
                    const BallCoverIndex<idx_t, value_t, int_t>& index,
                    int_t k,
                    const value_t* query,
diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
index cc95b32cee..9f0af8c29e 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
@@ -18,6 +18,7 @@
 
 #include "../ann_common.h"
 #include "../ivf_flat.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "processing.cuh"
 #include <raft/core/operators.hpp>
@@ -38,7 +39,7 @@
 namespace raft::spatial::knn::detail {
 
 template <typename T = float, typename IntType = int>
-void approx_knn_build_index(raft::device_resources const& handle,
+void approx_knn_build_index(raft::resources const& handle,
                             knnIndex* index,
                             knnIndexParam* params,
                             raft::distance::DistanceType metric,
@@ -47,7 +48,7 @@ void approx_knn_build_index(raft::device_resources const& handle,
                             IntType n,
                             IntType D)
 {
-  auto stream      = handle.get_stream();
+  auto stream      = resource::get_cuda_stream(handle);
   index->metric    = metric;
   index->metricArg = metricArg;
   if (dynamic_cast<const IVFParam*>(params)) {
@@ -92,7 +93,7 @@ void approx_knn_build_index(raft::device_resources const& handle,
 }
 
 template <typename T = float, typename IntType = int>
-void approx_knn_search(raft::device_resources const& handle,
+void approx_knn_search(raft::resources const& handle,
                        float* distances,
                        int64_t* indices,
                        knnIndex* index,
@@ -138,7 +139,7 @@ void approx_knn_search(raft::device_resources const& handle,
     float p = 0.5;  // standard l2
     if (index->metric == raft::distance::DistanceType::LpUnexpanded) p = 1.0 / index->metricArg;
     raft::linalg::unaryOp<float>(
-      distances, distances, n * k, raft::pow_const_op<float>(p), handle.get_stream());
+      distances, distances, n * k, raft::pow_const_op<float>(p), resource::get_cuda_stream(handle));
   }
   if constexpr (std::is_same_v<T, float>) { index->metric_processor->postprocess(distances); }
 }
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
index a58847ee41..4fe60e304b 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh
@@ -16,7 +16,9 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 
 #include "../ball_cover_types.hpp"
 #include "ball_cover/common.cuh"
@@ -63,23 +65,27 @@ namespace detail {
  * @param index
  */
 template <typename value_idx, typename value_t, typename value_int = std::uint32_t>
-void sample_landmarks(raft::device_resources const& handle,
+void sample_landmarks(raft::resources const& handle,
                       BallCoverIndex<value_idx, value_t, value_int>& index)
 {
-  rmm::device_uvector<value_idx> R_1nn_cols2(index.n_landmarks, handle.get_stream());
-  rmm::device_uvector<value_t> R_1nn_ones(index.m, handle.get_stream());
-  rmm::device_uvector<value_idx> R_indices(index.n_landmarks, handle.get_stream());
+  rmm::device_uvector<value_idx> R_1nn_cols2(index.n_landmarks, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_t> R_1nn_ones(index.m, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_idx> R_indices(index.n_landmarks, resource::get_cuda_stream(handle));
 
-  thrust::sequence(handle.get_thrust_policy(),
+  thrust::sequence(resource::get_thrust_policy(handle),
                    index.get_R_1nn_cols().data_handle(),
                    index.get_R_1nn_cols().data_handle() + index.m,
                    (value_idx)0);
 
-  thrust::fill(
-    handle.get_thrust_policy(), R_1nn_ones.data(), R_1nn_ones.data() + R_1nn_ones.size(), 1.0);
+  thrust::fill(resource::get_thrust_policy(handle),
+               R_1nn_ones.data(),
+               R_1nn_ones.data() + R_1nn_ones.size(),
+               1.0);
 
-  thrust::fill(
-    handle.get_thrust_policy(), R_indices.data(), R_indices.data() + R_indices.size(), 0.0);
+  thrust::fill(resource::get_thrust_policy(handle),
+               R_indices.data(),
+               R_indices.data() + R_indices.size(),
+               0.0);
 
   /**
    * 1. Randomly sample sqrt(n) points from X
@@ -117,15 +123,15 @@ void sample_landmarks(raft::device_resources const& handle,
  * @param index
  */
 template <typename value_idx, typename value_t, typename value_int = std::uint32_t>
-void construct_landmark_1nn(raft::device_resources const& handle,
+void construct_landmark_1nn(raft::resources const& handle,
                             const value_idx* R_knn_inds_ptr,
                             const value_t* R_knn_dists_ptr,
                             value_int k,
                             BallCoverIndex<value_idx, value_t, value_int>& index)
 {
-  rmm::device_uvector<value_idx> R_1nn_inds(index.m, handle.get_stream());
+  rmm::device_uvector<value_idx> R_1nn_inds(index.m, resource::get_cuda_stream(handle));
 
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_1nn_inds.data(),
                R_1nn_inds.data() + index.m,
                std::numeric_limits<value_idx>::max());
@@ -134,16 +140,17 @@ void construct_landmark_1nn(raft::device_resources const& handle,
   value_t* R_1nn_dists_ptr  = index.get_R_1nn_dists().data_handle();
 
   auto idxs = thrust::make_counting_iterator<value_idx>(0);
-  thrust::for_each(handle.get_thrust_policy(), idxs, idxs + index.m, [=] __device__(value_idx i) {
-    R_1nn_inds_ptr[i]  = R_knn_inds_ptr[i * k];
-    R_1nn_dists_ptr[i] = R_knn_dists_ptr[i * k];
-  });
+  thrust::for_each(
+    resource::get_thrust_policy(handle), idxs, idxs + index.m, [=] __device__(value_idx i) {
+      R_1nn_inds_ptr[i]  = R_knn_inds_ptr[i * k];
+      R_1nn_dists_ptr[i] = R_knn_dists_ptr[i * k];
+    });
 
   auto keys = thrust::make_zip_iterator(
     thrust::make_tuple(R_1nn_inds.data(), index.get_R_1nn_dists().data_handle()));
 
   // group neighborhoods for each reference landmark and sort each group by distance
-  thrust::sort_by_key(handle.get_thrust_policy(),
+  thrust::sort_by_key(resource::get_thrust_policy(handle),
                       keys,
                       keys + index.m,
                       index.get_R_1nn_cols().data_handle(),
@@ -154,7 +161,7 @@ void construct_landmark_1nn(raft::device_resources const& handle,
                                            index.m,
                                            index.get_R_indptr().data_handle(),
                                            index.n_landmarks + 1,
-                                           handle.get_stream());
+                                           resource::get_cuda_stream(handle));
 }
 
 /**
@@ -171,7 +178,7 @@ void construct_landmark_1nn(raft::device_resources const& handle,
  * @param R_knn_dists
  */
 template <typename value_idx, typename value_t, typename value_int = std::uint32_t>
-void k_closest_landmarks(raft::device_resources const& handle,
+void k_closest_landmarks(raft::resources const& handle,
                          const BallCoverIndex<value_idx, value_t, value_int>& index,
                          const value_t* query_pts,
                          value_int n_query_pts,
@@ -199,7 +206,7 @@ void k_closest_landmarks(raft::device_resources const& handle,
  * @param index
  */
 template <typename value_idx, typename value_t, typename value_int = std::uint32_t>
-void compute_landmark_radii(raft::device_resources const& handle,
+void compute_landmark_radii(raft::resources const& handle,
                             BallCoverIndex<value_idx, value_t, value_int>& index)
 {
   auto entries = thrust::make_counting_iterator<value_idx>(0);
@@ -207,7 +214,7 @@ void compute_landmark_radii(raft::device_resources const& handle,
   const value_idx* R_indptr_ptr  = index.get_R_indptr().data_handle();
   const value_t* R_1nn_dists_ptr = index.get_R_1nn_dists().data_handle();
   value_t* R_radius_ptr          = index.get_R_radius().data_handle();
-  thrust::for_each(handle.get_thrust_policy(),
+  thrust::for_each(resource::get_thrust_policy(handle),
                    entries,
                    entries + index.n_landmarks,
                    [=] __device__(value_idx input) {
@@ -230,7 +237,7 @@ template <typename value_idx,
           typename value_t,
           typename value_int = std::uint32_t,
           typename dist_func>
-void perform_rbc_query(raft::device_resources const& handle,
+void perform_rbc_query(raft::resources const& handle,
                        const BallCoverIndex<value_idx, value_t, value_int>& index,
                        const value_t* query,
                        value_int n_query_pts,
@@ -246,11 +253,11 @@ void perform_rbc_query(raft::device_resources const& handle,
                        bool perform_post_filtering = true)
 {
   // initialize output inds and dists
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                inds,
                inds + (k * n_query_pts),
                std::numeric_limits<value_idx>::max());
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                dists,
                dists + (k * n_query_pts),
                std::numeric_limits<value_t>::max());
@@ -332,21 +339,21 @@ template <typename value_idx = std::int64_t,
           typename value_t,
           typename value_int = std::uint32_t,
           typename distance_func>
-void rbc_build_index(raft::device_resources const& handle,
+void rbc_build_index(raft::resources const& handle,
                      BallCoverIndex<value_idx, value_t, value_int>& index,
                      distance_func dfunc)
 {
   ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation");
   ASSERT(!index.is_index_trained(), "index cannot be previously trained");
 
-  rmm::device_uvector<value_idx> R_knn_inds(index.m, handle.get_stream());
+  rmm::device_uvector<value_idx> R_knn_inds(index.m, resource::get_cuda_stream(handle));
 
   // Initialize the uvectors
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_knn_inds.begin(),
                R_knn_inds.end(),
                std::numeric_limits<value_idx>::max());
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                index.get_R_closest_landmark_dists().data_handle(),
                index.get_R_closest_landmark_dists().data_handle() + index.m,
                std::numeric_limits<value_t>::max());
@@ -391,7 +398,7 @@ template <typename value_idx = std::int64_t,
           typename value_t,
           typename value_int = std::uint32_t,
           typename distance_func>
-void rbc_all_knn_query(raft::device_resources const& handle,
+void rbc_all_knn_query(raft::resources const& handle,
                        BallCoverIndex<value_idx, value_t, value_int>& index,
                        value_int k,
                        value_idx* inds,
@@ -405,27 +412,31 @@ void rbc_all_knn_query(raft::device_resources const& handle,
   ASSERT(index.n_landmarks >= k, "number of landmark samples must be >= k");
   ASSERT(!index.is_index_trained(), "index cannot be previously trained");
 
-  rmm::device_uvector<value_idx> R_knn_inds(k * index.m, handle.get_stream());
-  rmm::device_uvector<value_t> R_knn_dists(k * index.m, handle.get_stream());
+  rmm::device_uvector<value_idx> R_knn_inds(k * index.m, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_t> R_knn_dists(k * index.m, resource::get_cuda_stream(handle));
 
   // Initialize the uvectors
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_knn_inds.begin(),
                R_knn_inds.end(),
                std::numeric_limits<value_idx>::max());
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_knn_dists.begin(),
                R_knn_dists.end(),
                std::numeric_limits<value_t>::max());
 
-  thrust::fill(
-    handle.get_thrust_policy(), inds, inds + (k * index.m), std::numeric_limits<value_idx>::max());
-  thrust::fill(
-    handle.get_thrust_policy(), dists, dists + (k * index.m), std::numeric_limits<value_t>::max());
+  thrust::fill(resource::get_thrust_policy(handle),
+               inds,
+               inds + (k * index.m),
+               std::numeric_limits<value_idx>::max());
+  thrust::fill(resource::get_thrust_policy(handle),
+               dists,
+               dists + (k * index.m),
+               std::numeric_limits<value_t>::max());
 
   // For debugging / verification. Remove before releasing
-  rmm::device_uvector<value_int> dists_counter(index.m, handle.get_stream());
-  rmm::device_uvector<value_int> post_dists_counter(index.m, handle.get_stream());
+  rmm::device_uvector<value_int> dists_counter(index.m, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_int> post_dists_counter(index.m, resource::get_cuda_stream(handle));
 
   sample_landmarks<value_idx, value_t>(handle, index);
 
@@ -460,7 +471,7 @@ template <typename value_idx = std::int64_t,
           typename value_t,
           typename value_int = std::uint32_t,
           typename distance_func>
-void rbc_knn_query(raft::device_resources const& handle,
+void rbc_knn_query(raft::resources const& handle,
                    const BallCoverIndex<value_idx, value_t, value_int>& index,
                    value_int k,
                    const value_t* query,
@@ -476,24 +487,24 @@ void rbc_knn_query(raft::device_resources const& handle,
   ASSERT(index.n_landmarks >= k, "number of landmark samples must be >= k");
   ASSERT(index.is_index_trained(), "index must be previously trained");
 
-  rmm::device_uvector<value_idx> R_knn_inds(k * n_query_pts, handle.get_stream());
-  rmm::device_uvector<value_t> R_knn_dists(k * n_query_pts, handle.get_stream());
+  rmm::device_uvector<value_idx> R_knn_inds(k * n_query_pts, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_t> R_knn_dists(k * n_query_pts, resource::get_cuda_stream(handle));
 
   // Initialize the uvectors
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_knn_inds.begin(),
                R_knn_inds.end(),
                std::numeric_limits<value_idx>::max());
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                R_knn_dists.begin(),
                R_knn_dists.end(),
                std::numeric_limits<value_t>::max());
 
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                inds,
                inds + (k * n_query_pts),
                std::numeric_limits<value_idx>::max());
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                dists,
                dists + (k * n_query_pts),
                std::numeric_limits<value_t>::max());
@@ -501,13 +512,13 @@ void rbc_knn_query(raft::device_resources const& handle,
   k_closest_landmarks(handle, index, query, n_query_pts, k, R_knn_inds.data(), R_knn_dists.data());
 
   // For debugging / verification. Remove before releasing
-  rmm::device_uvector<value_int> dists_counter(index.m, handle.get_stream());
-  rmm::device_uvector<value_int> post_dists_counter(index.m, handle.get_stream());
-  thrust::fill(handle.get_thrust_policy(),
+  rmm::device_uvector<value_int> dists_counter(index.m, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_int> post_dists_counter(index.m, resource::get_cuda_stream(handle));
+  thrust::fill(resource::get_thrust_policy(handle),
                post_dists_counter.data(),
                post_dists_counter.data() + post_dists_counter.size(),
                0);
-  thrust::fill(handle.get_thrust_policy(),
+  thrust::fill(resource::get_thrust_policy(handle),
                dists_counter.data(),
                dists_counter.data() + dists_counter.size(),
                0);
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
index efe1a8a70b..95aeca64e5 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh
@@ -30,7 +30,7 @@ template <typename value_idx,
           typename value_int = std::uint32_t,
           int dims           = 2,
           typename dist_func>
-void rbc_low_dim_pass_one(raft::device_resources const& handle,
+void rbc_low_dim_pass_one(raft::resources const& handle,
                           const BallCoverIndex<value_idx, value_t, value_int>& index,
                           const value_t* query,
                           const value_int n_query_rows,
@@ -48,7 +48,7 @@ template <typename value_idx,
           typename value_int = std::uint32_t,
           int dims           = 2,
           typename dist_func>
-void rbc_low_dim_pass_two(raft::device_resources const& handle,
+void rbc_low_dim_pass_two(raft::resources const& handle,
                           const BallCoverIndex<value_idx, value_t, value_int>& index,
                           const value_t* query,
                           const value_int n_query_rows,
@@ -69,7 +69,7 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   extern template void                                                                       \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
@@ -86,7 +86,7 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   extern template void                                                                       \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
index e0e7d716ee..52267fd83e 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh
@@ -17,6 +17,8 @@
 #pragma once
 
 #include "common.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include "../../ball_cover_types.hpp"
 #include "../haversine_distance.cuh"
@@ -457,7 +459,7 @@ template <typename value_idx,
           typename value_int = std::uint32_t,
           int dims           = 2,
           typename dist_func>
-void rbc_low_dim_pass_one(raft::device_resources const& handle,
+void rbc_low_dim_pass_one(raft::resources const& handle,
                           const BallCoverIndex<value_idx, value_t, value_int>& index,
                           const value_t* query,
                           const value_int n_query_rows,
@@ -472,116 +474,122 @@ void rbc_low_dim_pass_one(raft::device_resources const& handle,
 {
   if (k <= 32)
     block_rbc_kernel_registers<value_idx, value_t, 32, 2, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
 
   else if (k <= 64)
     block_rbc_kernel_registers<value_idx, value_t, 64, 3, 128, 2, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
   else if (k <= 128)
     block_rbc_kernel_registers<value_idx, value_t, 128, 3, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
 
   else if (k <= 256)
     block_rbc_kernel_registers<value_idx, value_t, 256, 4, 128, dims, value_int>
-      <<<n_query_rows, 128, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                      query,
-                                                      index.n,
-                                                      R_knn_inds,
-                                                      R_knn_dists,
-                                                      index.m,
-                                                      k,
-                                                      index.get_R_indptr().data_handle(),
-                                                      index.get_R_1nn_cols().data_handle(),
-                                                      index.get_R_1nn_dists().data_handle(),
-                                                      inds,
-                                                      dists,
-                                                      dists_counter,
-                                                      index.get_R_radius().data_handle(),
-                                                      dfunc,
-                                                      weight);
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
 
   else if (k <= 512)
     block_rbc_kernel_registers<value_idx, value_t, 512, 8, 64, dims, value_int>
-      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                     query,
-                                                     index.n,
-                                                     R_knn_inds,
-                                                     R_knn_dists,
-                                                     index.m,
-                                                     k,
-                                                     index.get_R_indptr().data_handle(),
-                                                     index.get_R_1nn_cols().data_handle(),
-                                                     index.get_R_1nn_dists().data_handle(),
-                                                     inds,
-                                                     dists,
-                                                     dists_counter,
-                                                     index.get_R_radius().data_handle(),
-                                                     dfunc,
-                                                     weight);
+      <<<n_query_rows, 64, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
 
   else if (k <= 1024)
     block_rbc_kernel_registers<value_idx, value_t, 1024, 8, 64, dims, value_int>
-      <<<n_query_rows, 64, 0, handle.get_stream()>>>(index.get_X().data_handle(),
-                                                     query,
-                                                     index.n,
-                                                     R_knn_inds,
-                                                     R_knn_dists,
-                                                     index.m,
-                                                     k,
-                                                     index.get_R_indptr().data_handle(),
-                                                     index.get_R_1nn_cols().data_handle(),
-                                                     index.get_R_1nn_dists().data_handle(),
-                                                     inds,
-                                                     dists,
-                                                     dists_counter,
-                                                     index.get_R_radius().data_handle(),
-                                                     dfunc,
-                                                     weight);
+      <<<n_query_rows, 64, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        R_knn_inds,
+        R_knn_dists,
+        index.m,
+        k,
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        dists_counter,
+        index.get_R_radius().data_handle(),
+        dfunc,
+        weight);
 }
 
 template <typename value_idx,
@@ -589,7 +597,7 @@ template <typename value_idx,
           typename value_int = std::uint32_t,
           int dims           = 2,
           typename dist_func>
-void rbc_low_dim_pass_two(raft::device_resources const& handle,
+void rbc_low_dim_pass_two(raft::resources const& handle,
                           const BallCoverIndex<value_idx, value_t, value_int>& index,
                           const value_t* query,
                           const value_int n_query_rows,
@@ -604,11 +612,13 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
 {
   const value_int bitset_size = ceil(index.n_landmarks / 32.0);
 
-  rmm::device_uvector<std::uint32_t> bitset(bitset_size * n_query_rows, handle.get_stream());
-  thrust::fill(handle.get_thrust_policy(), bitset.data(), bitset.data() + bitset.size(), 0);
+  rmm::device_uvector<std::uint32_t> bitset(bitset_size * n_query_rows,
+                                            resource::get_cuda_stream(handle));
+  thrust::fill(
+    resource::get_thrust_policy(handle), bitset.data(), bitset.data() + bitset.size(), 0);
 
   perform_post_filter_registers<value_idx, value_t, value_int, dims, 128>
-    <<<n_query_rows, 128, bitset_size * sizeof(std::uint32_t), handle.get_stream()>>>(
+    <<<n_query_rows, 128, bitset_size * sizeof(std::uint32_t), resource::get_cuda_stream(handle)>>>(
       query,
       index.n,
       R_knn_inds,
@@ -631,22 +641,23 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   32,
                                   2,
                                   128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
+                                  dims>
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        bitset.data(),
+        bitset_size,
+        index.get_R_closest_landmark_dists().data_handle(),
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        index.n_landmarks,
+        k,
+        dfunc,
+        post_dists_counter);
   else if (k <= 64)
     compute_final_dists_registers<value_idx,
                                   value_t,
@@ -656,22 +667,23 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   64,
                                   3,
                                   128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
+                                  dims>
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        bitset.data(),
+        bitset_size,
+        index.get_R_closest_landmark_dists().data_handle(),
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        index.n_landmarks,
+        k,
+        dfunc,
+        post_dists_counter);
   else if (k <= 128)
     compute_final_dists_registers<value_idx,
                                   value_t,
@@ -681,22 +693,23 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   128,
                                   3,
                                   128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
+                                  dims>
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        bitset.data(),
+        bitset_size,
+        index.get_R_closest_landmark_dists().data_handle(),
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        index.n_landmarks,
+        k,
+        dfunc,
+        post_dists_counter);
   else if (k <= 256)
     compute_final_dists_registers<value_idx,
                                   value_t,
@@ -706,22 +719,23 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   256,
                                   4,
                                   128,
-                                  dims><<<n_query_rows, 128, 0, handle.get_stream()>>>(
-      index.get_X().data_handle(),
-      query,
-      index.n,
-      bitset.data(),
-      bitset_size,
-      index.get_R_closest_landmark_dists().data_handle(),
-      index.get_R_indptr().data_handle(),
-      index.get_R_1nn_cols().data_handle(),
-      index.get_R_1nn_dists().data_handle(),
-      inds,
-      dists,
-      index.n_landmarks,
-      k,
-      dfunc,
-      post_dists_counter);
+                                  dims>
+      <<<n_query_rows, 128, 0, resource::get_cuda_stream(handle)>>>(
+        index.get_X().data_handle(),
+        query,
+        index.n,
+        bitset.data(),
+        bitset_size,
+        index.get_R_closest_landmark_dists().data_handle(),
+        index.get_R_indptr().data_handle(),
+        index.get_R_1nn_cols().data_handle(),
+        index.get_R_1nn_dists().data_handle(),
+        inds,
+        dists,
+        index.n_landmarks,
+        k,
+        dfunc,
+        post_dists_counter);
   else if (k <= 512)
     compute_final_dists_registers<value_idx,
                                   value_t,
@@ -731,7 +745,7 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   512,
                                   8,
                                   64,
-                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
+                                  dims><<<n_query_rows, 64, 0, resource::get_cuda_stream(handle)>>>(
       index.get_X().data_handle(),
       query,
       index.n,
@@ -756,7 +770,7 @@ void rbc_low_dim_pass_two(raft::device_resources const& handle,
                                   1024,
                                   8,
                                   64,
-                                  dims><<<n_query_rows, 64, 0, handle.get_stream()>>>(
+                                  dims><<<n_query_rows, 64, 0, resource::get_cuda_stream(handle)>>>(
       index.get_X().data_handle(),
       query,
       index.n,
diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
index 058e98da9f..34ef30ace3 100644
--- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
+++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
@@ -20,7 +20,7 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/pow2_utils.cuh>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/detail/faiss_select/Select.cuh>
 
diff --git a/cpp/include/raft/spatial/knn/knn.cuh b/cpp/include/raft/spatial/knn/knn.cuh
index a7bbfd9500..7b088316a3 100644
--- a/cpp/include/raft/spatial/knn/knn.cuh
+++ b/cpp/include/raft/spatial/knn/knn.cuh
@@ -194,7 +194,7 @@ template <typename idx_t = int, typename value_t = float>
  *            as input vector.
  */
 template <typename idx_t = std::int64_t, typename value_t = float, typename value_int = int>
-void brute_force_knn(raft::device_resources const& handle,
+void brute_force_knn(raft::resources const& handle,
                      std::vector<value_t*>& input,
                      std::vector<value_int>& sizes,
                      value_int D,
diff --git a/cpp/include/raft/spectral/cluster_solvers.cuh b/cpp/include/raft/spectral/cluster_solvers.cuh
index 1cb7aefd13..ecd14ebf42 100644
--- a/cpp/include/raft/spectral/cluster_solvers.cuh
+++ b/cpp/include/raft/spectral/cluster_solvers.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/cluster/kmeans.cuh>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <utility>  // for std::pair
 
 namespace raft {
@@ -47,7 +48,7 @@ struct kmeans_solver_t {
   {
   }
 
-  std::pair<value_type_t, index_type_t> solve(raft::device_resources const& handle,
+  std::pair<value_type_t, index_type_t> solve(raft::resources const& handle,
                                               size_type_t n_obs_vecs,
                                               size_type_t dim,
                                               value_type_t const* __restrict__ obs,
@@ -68,8 +69,10 @@ struct kmeans_solver_t {
     auto centroids =
       raft::make_device_matrix<value_type_t, index_type_t>(handle, config_.n_clusters, dim);
     auto weight = raft::make_device_vector<value_type_t, index_type_t>(handle, n_obs_vecs);
-    thrust::fill(
-      handle.get_thrust_policy(), weight.data_handle(), weight.data_handle() + n_obs_vecs, 1);
+    thrust::fill(resource::get_thrust_policy(handle),
+                 weight.data_handle(),
+                 weight.data_handle() + n_obs_vecs,
+                 1);
 
     auto sw = std::make_optional((raft::device_vector_view<const value_type_t>)weight.view());
     raft::cluster::kmeans_fit_predict<value_type_t, index_type_t>(
diff --git a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh
index 17dcf6b07c..008f1e5d13 100644
--- a/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh
+++ b/cpp/include/raft/spectral/cluster_solvers_deprecated.cuh
@@ -52,7 +52,7 @@ struct kmeans_solver_deprecated_t {
   {
   }
 
-  std::pair<value_type_t, index_type_t> solve(raft::device_resources const& handle,
+  std::pair<value_type_t, index_type_t> solve(raft::resources const& handle,
                                               size_type_t n_obs_vecs,
                                               size_type_t dim,
                                               value_type_t const* __restrict__ obs,
diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
index 7128bfae32..07ab1dbeba 100644
--- a/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/detail/matrix_wrappers.hpp
@@ -15,7 +15,11 @@
  */
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusparse_handle.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cudart_utils.hpp>
@@ -89,8 +93,9 @@ struct vector_view_t {
 template <typename value_type>
 class vector_t {
  public:
-  vector_t(device_resources const& raft_handle, size_type sz)
-    : buffer_(sz, raft_handle.get_stream()), thrust_policy(raft_handle.get_thrust_policy())
+  vector_t(resources const& raft_handle, size_type sz)
+    : buffer_(sz, resource::get_cuda_stream(raft_handle)),
+      thrust_policy(resource::get_thrust_policy(raft_handle))
   {
   }
 
@@ -128,7 +133,7 @@ class vector_t {
 
 template <typename index_type, typename value_type>
 struct sparse_matrix_t {
-  sparse_matrix_t(device_resources const& raft_handle,
+  sparse_matrix_t(resources const& raft_handle,
                   index_type const* row_offsets,
                   index_type const* col_indices,
                   value_type const* values,
@@ -145,7 +150,7 @@ struct sparse_matrix_t {
   {
   }
 
-  sparse_matrix_t(device_resources const& raft_handle,
+  sparse_matrix_t(resources const& raft_handle,
                   index_type const* row_offsets,
                   index_type const* col_indices,
                   value_type const* values,
@@ -162,7 +167,7 @@ struct sparse_matrix_t {
   }
 
   template <typename CSRView>
-  sparse_matrix_t(device_resources const& raft_handle, CSRView const& csr_view)
+  sparse_matrix_t(resources const& raft_handle, CSRView const& csr_view)
     : handle_(raft_handle),
       row_offsets_(csr_view.offsets),
       col_indices_(csr_view.indices),
@@ -194,8 +199,8 @@ struct sparse_matrix_t {
     RAFT_EXPECTS(x != nullptr, "Null x buffer.");
     RAFT_EXPECTS(y != nullptr, "Null y buffer.");
 
-    auto cusparse_h = handle_.get_cusparse_handle();
-    auto stream     = handle_.get_stream();
+    auto cusparse_h = resource::get_cusparse_handle(handle_);
+    auto stream     = resource::get_cuda_stream(handle_);
 
     cusparseOperation_t trans = transpose ? CUSPARSE_OPERATION_TRANSPOSE :  // transpose
                                   CUSPARSE_OPERATION_NON_TRANSPOSE;         // non-transpose
@@ -281,7 +286,7 @@ struct sparse_matrix_t {
 #endif
   }
 
-  device_resources const& get_handle(void) const { return handle_; }
+  resources const& get_handle(void) const { return handle_; }
 
 #if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
   cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const
@@ -297,7 +302,7 @@ struct sparse_matrix_t {
   // private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence,
   // aggregate
 
-  raft::device_resources const& handle_;
+  raft::resources const& handle_;
   index_type const* row_offsets_;
   index_type const* col_indices_;
   value_type const* values_;
@@ -308,7 +313,7 @@ struct sparse_matrix_t {
 
 template <typename index_type, typename value_type>
 struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
-  laplacian_matrix_t(device_resources const& raft_handle,
+  laplacian_matrix_t(resources const& raft_handle,
                      index_type const* row_offsets,
                      index_type const* col_indices,
                      value_type const* values,
@@ -323,7 +328,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
     sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
   }
 
-  laplacian_matrix_t(device_resources const& raft_handle,
+  laplacian_matrix_t(resources const& raft_handle,
                      sparse_matrix_t<index_type, value_type> const& csr_m)
     : sparse_matrix_t<index_type, value_type>(raft_handle,
                                               csr_m.row_offsets_,
@@ -351,8 +356,9 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
     constexpr int BLOCK_SIZE = 1024;
     auto n                   = sparse_matrix_t<index_type, value_type>::nrows_;
 
-    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
-    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
+    auto handle   = sparse_matrix_t<index_type, value_type>::get_handle();
+    auto cublas_h = resource::get_cublas_handle(handle);
+    auto stream   = resource::get_cuda_stream(handle);
 
     // scales y by beta:
     //
@@ -381,7 +387,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
 
 template <typename index_type, typename value_type>
 struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
-  modularity_matrix_t(device_resources const& raft_handle,
+  modularity_matrix_t(resources const& raft_handle,
                       index_type const* row_offsets,
                       index_type const* col_indices,
                       value_type const* values,
@@ -393,7 +399,7 @@ struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
     edge_sum_ = laplacian_matrix_t<index_type, value_type>::diagonal_.nrm1();
   }
 
-  modularity_matrix_t(device_resources const& raft_handle,
+  modularity_matrix_t(resources const& raft_handle,
                       sparse_matrix_t<index_type, value_type> const& csr_m)
     : laplacian_matrix_t<index_type, value_type>(raft_handle, csr_m)
   {
@@ -412,8 +418,9 @@ struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
   {
     auto n = sparse_matrix_t<index_type, value_type>::nrows_;
 
-    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
-    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
+    auto handle   = sparse_matrix_t<index_type, value_type>::get_handle();
+    auto cublas_h = resource::get_cublas_handle(handle);
+    auto stream   = resource::get_cuda_stream(handle);
 
     // y = A*x
     //
diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
index d81c64b257..f60fbf863e 100644
--- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
@@ -17,6 +17,8 @@
 #pragma once
 
 #include <math.h>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <stdio.h>
 
 #include <cuda.h>
@@ -63,7 +65,7 @@ namespace detail {
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
@@ -75,8 +77,8 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
   RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
 
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
+  auto stream   = resource::get_cuda_stream(handle);
+  auto cublas_h = resource::get_cublas_handle(handle);
 
   std::tuple<vertex_t, weight_t, vertex_t>
     stats;  // # iters eigen solver, cluster solver residual, # iters cluster solver
@@ -122,7 +124,7 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
  *  @param modularity On exit, modularity
  */
 template <typename vertex_t, typename weight_t>
-void analyzeModularity(raft::device_resources const& handle,
+void analyzeModularity(raft::resources const& handle,
                        raft::spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                        vertex_t nClusters,
                        vertex_t const* __restrict__ clusters,
@@ -134,8 +136,8 @@ void analyzeModularity(raft::device_resources const& handle,
   vertex_t n = csr_m.nrows_;
   weight_t partModularity, clustersize;
 
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
+  auto cublas_h = resource::get_cublas_handle(handle);
+  auto stream   = resource::get_cuda_stream(handle);
 
   // Device memory
   raft::spectral::matrix::vector_t<weight_t> part_i(handle, n);
diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
index 6750f5d93f..ed59ad3149 100644
--- a/cpp/include/raft/spectral/detail/partition.hpp
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -16,6 +16,8 @@
 #pragma once
 
 #include <math.h>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <stdio.h>
 
 #include <cuda.h>
@@ -63,7 +65,7 @@ namespace detail {
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> partition(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
@@ -75,8 +77,8 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
   RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
   RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
 
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
+  auto stream   = resource::get_cuda_stream(handle);
+  auto cublas_h = resource::get_cublas_handle(handle);
 
   std::tuple<vertex_t, weight_t, vertex_t>
     stats;  //{iters_eig_solver,residual_cluster,iters_cluster_solver} // # iters eigen solver,
@@ -131,7 +133,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
  *  @return error flag.
  */
 template <typename vertex_t, typename weight_t>
-void analyzePartition(raft::device_resources const& handle,
+void analyzePartition(raft::resources const& handle,
                       spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                       vertex_t nClusters,
                       const vertex_t* __restrict__ clusters,
@@ -143,8 +145,8 @@ void analyzePartition(raft::device_resources const& handle,
   vertex_t i;
   vertex_t n = csr_m.nrows_;
 
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
+  auto stream   = resource::get_cuda_stream(handle);
+  auto cublas_h = resource::get_cublas_handle(handle);
 
   weight_t partEdgesCut, clustersize;
 
diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh
index ae75031522..b95b21e079 100644
--- a/cpp/include/raft/spectral/detail/spectral_util.cuh
+++ b/cpp/include/raft/spectral/detail/spectral_util.cuh
@@ -16,7 +16,10 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/spectral/matrix_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
@@ -116,14 +119,14 @@ cudaError_t scale_obs(index_type_t m, index_type_t n, value_type_t* obs)
 }
 
 template <typename vertex_t, typename edge_t, typename weight_t>
-void transform_eigen_matrix(raft::device_resources const& handle,
+void transform_eigen_matrix(raft::resources const& handle,
                             edge_t n,
                             vertex_t nEigVecs,
                             weight_t* eigVecs)
 {
-  auto stream             = handle.get_stream();
-  auto cublas_h           = handle.get_cublas_handle();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto cublas_h           = resource::get_cublas_handle(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   const weight_t zero{0.0};
   const weight_t one{1.0};
@@ -210,7 +213,7 @@ struct equal_to_i_op {
 // Construct indicator vector for ith partition
 //
 template <typename vertex_t, typename edge_t, typename weight_t>
-bool construct_indicator(raft::device_resources const& handle,
+bool construct_indicator(raft::resources const& handle,
                          edge_t index,
                          edge_t n,
                          weight_t& clustersize,
@@ -220,9 +223,9 @@ bool construct_indicator(raft::device_resources const& handle,
                          raft::spectral::matrix::vector_t<weight_t>& Bx,
                          raft::spectral::matrix::laplacian_matrix_t<vertex_t, weight_t> const& B)
 {
-  auto stream             = handle.get_stream();
-  auto cublas_h           = handle.get_cublas_handle();
-  auto thrust_exec_policy = handle.get_thrust_policy();
+  auto stream             = resource::get_cuda_stream(handle);
+  auto cublas_h           = resource::get_cublas_handle(handle);
+  auto thrust_exec_policy = resource::get_thrust_policy(handle);
 
   thrust::for_each(
     thrust_exec_policy,
diff --git a/cpp/include/raft/spectral/eigen_solvers.cuh b/cpp/include/raft/spectral/eigen_solvers.cuh
index 3f6959d2e2..4774d8b8ae 100644
--- a/cpp/include/raft/spectral/eigen_solvers.cuh
+++ b/cpp/include/raft/spectral/eigen_solvers.cuh
@@ -50,7 +50,7 @@ struct lanczos_solver_t {
   }
 
   index_type_t solve_smallest_eigenvectors(
-    raft::device_resources const& handle,
+    raft::resources const& handle,
     matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
@@ -73,7 +73,7 @@ struct lanczos_solver_t {
   }
 
   index_type_t solve_largest_eigenvectors(
-    raft::device_resources const& handle,
+    raft::resources const& handle,
     matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
diff --git a/cpp/include/raft/spectral/modularity_maximization.cuh b/cpp/include/raft/spectral/modularity_maximization.cuh
index 29d432c042..7d4f42f952 100644
--- a/cpp/include/raft/spectral/modularity_maximization.cuh
+++ b/cpp/include/raft/spectral/modularity_maximization.cuh
@@ -45,7 +45,7 @@ namespace spectral {
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
@@ -70,7 +70,7 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
  *  @param modularity On exit, modularity
  */
 template <typename vertex_t, typename weight_t>
-void analyzeModularity(raft::device_resources const& handle,
+void analyzeModularity(raft::resources const& handle,
                        matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                        vertex_t nClusters,
                        vertex_t const* __restrict__ clusters,
diff --git a/cpp/include/raft/spectral/partition.cuh b/cpp/include/raft/spectral/partition.cuh
index 0dec230752..b7289aae3d 100644
--- a/cpp/include/raft/spectral/partition.cuh
+++ b/cpp/include/raft/spectral/partition.cuh
@@ -47,7 +47,7 @@ namespace spectral {
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> partition(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
@@ -78,7 +78,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
  *  @param cost On exit, partition cost function.
  */
 template <typename vertex_t, typename weight_t>
-void analyzePartition(raft::device_resources const& handle,
+void analyzePartition(raft::resources const& handle,
                       matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                       vertex_t nClusters,
                       const vertex_t* __restrict__ clusters,
diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh
index 7a5780fbc9..6625d38a7a 100644
--- a/cpp/include/raft/stats/accuracy.cuh
+++ b/cpp/include/raft/stats/accuracy.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -55,7 +56,7 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n,
  * @return: Accuracy score in [0, 1]; higher is better.
  */
 template <typename value_t, typename idx_t>
-float accuracy(raft::device_resources const& handle,
+float accuracy(raft::resources const& handle,
                raft::device_vector_view<const value_t, idx_t> predictions,
                raft::device_vector_view<const value_t, idx_t> ref_predictions)
 {
@@ -66,7 +67,7 @@ float accuracy(raft::device_resources const& handle,
   return detail::accuracy_score(predictions.data_handle(),
                                 ref_predictions.data_handle(),
                                 predictions.extent(0),
-                                handle.get_stream());
+                                resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_accuracy
diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh
index 2500a48bfb..7daa1792b1 100644
--- a/cpp/include/raft/stats/adjusted_rand_index.cuh
+++ b/cpp/include/raft/stats/adjusted_rand_index.cuh
@@ -25,6 +25,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/adjusted_rand_index.cuh>
 
 namespace raft {
@@ -66,7 +67,7 @@ double adjusted_rand_index(const T* firstClusterArray,
  * @return the Adjusted RandIndex
  */
 template <typename value_t, typename math_t, typename idx_t>
-double adjusted_rand_index(raft::device_resources const& handle,
+double adjusted_rand_index(raft::resources const& handle,
                            raft::device_vector_view<const value_t, idx_t> first_cluster_array,
                            raft::device_vector_view<const value_t, idx_t> second_cluster_array)
 {
@@ -77,7 +78,7 @@ double adjusted_rand_index(raft::device_resources const& handle,
   return detail::compute_adjusted_rand_index<value_t, math_t>(first_cluster_array.data_handle(),
                                                               second_cluster_array.data_handle(),
                                                               first_cluster_array.extent(0),
-                                                              handle.get_stream());
+                                                              resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_adj_rand_index
diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh
index 10e1753423..07fd61411d 100644
--- a/cpp/include/raft/stats/completeness_score.cuh
+++ b/cpp/include/raft/stats/completeness_score.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/homogeneity_score.cuh>
 
 namespace raft {
@@ -65,7 +66,7 @@ double completeness_score(const T* truthClusterArray,
  * @return the cluster completeness score
  */
 template <typename value_t, typename idx_t>
-double completeness_score(raft::device_resources const& handle,
+double completeness_score(raft::resources const& handle,
                           raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
                           raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
                           value_t lower_label_range,
@@ -79,7 +80,7 @@ double completeness_score(raft::device_resources const& handle,
                                    truth_cluster_array.extent(0),
                                    lower_label_range,
                                    upper_label_range,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_completeness
diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh
index e309e8b4b9..16f0998435 100644
--- a/cpp/include/raft/stats/contingency_matrix.cuh
+++ b/cpp/include/raft/stats/contingency_matrix.cuh
@@ -21,8 +21,9 @@
 
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/contingencyMatrix.cuh>
 
 namespace raft {
@@ -120,7 +121,7 @@ void contingencyMatrix(const T* groundTruth,
  * @param[out] maxLabel: calculated max value in input array
  */
 template <typename value_t, typename idx_t>
-void get_input_class_cardinality(raft::device_resources const& handle,
+void get_input_class_cardinality(raft::resources const& handle,
                                  raft::device_vector_view<const value_t, idx_t> groundTruth,
                                  raft::host_scalar_view<value_t> minLabel,
                                  raft::host_scalar_view<value_t> maxLabel)
@@ -129,7 +130,7 @@ void get_input_class_cardinality(raft::device_resources const& handle,
   RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer");
   detail::getInputClassCardinality(groundTruth.data_handle(),
                                    groundTruth.extent(0),
-                                   handle.get_stream(),
+                                   resource::get_cuda_stream(handle),
                                    *minLabel.data_handle(),
                                    *maxLabel.data_handle());
 }
@@ -158,7 +159,7 @@ template <typename value_t,
           typename layout_t,
           typename opt_min_label_t,
           typename opt_max_label_t>
-void contingency_matrix(raft::device_resources const& handle,
+void contingency_matrix(raft::resources const& handle,
                         raft::device_vector_view<const value_t, idx_t> ground_truth,
                         raft::device_vector_view<const value_t, idx_t> predicted_label,
                         raft::device_matrix_view<out_t, idx_t, layout_t> out_mat,
@@ -180,7 +181,7 @@ void contingency_matrix(raft::device_resources const& handle,
 
   auto workspace_sz = detail::getContingencyMatrixWorkspaceSize(ground_truth.extent(0),
                                                                 ground_truth.data_handle(),
-                                                                handle.get_stream(),
+                                                                resource::get_cuda_stream(handle),
                                                                 min_label_value,
                                                                 max_label_value);
   auto workspace    = raft::make_device_vector<char>(handle, workspace_sz);
@@ -189,7 +190,7 @@ void contingency_matrix(raft::device_resources const& handle,
                                             predicted_label.data_handle(),
                                             ground_truth.extent(0),
                                             out_mat.data_handle(),
-                                            handle.get_stream(),
+                                            resource::get_cuda_stream(handle),
                                             workspace.data_handle(),
                                             workspace_sz,
                                             min_label_value,
diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh
index f58061ba72..67f44b0fde 100644
--- a/cpp/include/raft/stats/cov.cuh
+++ b/cpp/include/raft/stats/cov.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/cov.cuh>
 namespace raft {
 namespace stats {
@@ -45,7 +46,7 @@ namespace stats {
  * function returns!
  */
 template <typename Type>
-void cov(raft::device_resources const& handle,
+void cov(raft::resources const& handle,
          Type* covar,
          Type* data,
          const Type* mu,
@@ -85,7 +86,7 @@ void cov(raft::device_resources const& handle,
  * function returns!
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void cov(raft::device_resources const& handle,
+void cov(raft::resources const& handle,
          raft::device_matrix_view<value_t, idx_t, layout_t> data,
          raft::device_vector_view<const value_t, idx_t> mu,
          raft::device_matrix_view<value_t, idx_t, layout_t> covar,
@@ -110,7 +111,7 @@ void cov(raft::device_resources const& handle,
               std::is_same_v<layout_t, raft::row_major>,
               sample,
               stable,
-              handle.get_stream());
+              resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_cov
diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
index a184fe22ef..17c7515fbb 100644
--- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
@@ -17,6 +17,9 @@
 #pragma once
 
 #include "../silhouette_score.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/device_atomics.cuh>
 #include <rmm/device_uvector.hpp>
@@ -111,12 +114,12 @@ __global__ void compute_chunked_a_b_kernel(value_t* a,
 }
 
 template <typename value_idx, typename label_idx>
-rmm::device_uvector<value_idx> get_cluster_counts(raft::device_resources const& handle,
+rmm::device_uvector<value_idx> get_cluster_counts(raft::resources const& handle,
                                                   const label_idx* y,
                                                   value_idx& n_rows,
                                                   label_idx& n_labels)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<value_idx> cluster_counts(n_labels, stream);
 
@@ -128,7 +131,7 @@ rmm::device_uvector<value_idx> get_cluster_counts(raft::device_resources const&
 }
 
 template <typename value_t, typename value_idx>
-rmm::device_uvector<value_t> get_pairwise_distance(raft::device_resources const& handle,
+rmm::device_uvector<value_t> get_pairwise_distance(raft::resources const& handle,
                                                    const value_t* left_begin,
                                                    const value_t* right_begin,
                                                    value_idx& n_left_rows,
@@ -146,7 +149,7 @@ rmm::device_uvector<value_t> get_pairwise_distance(raft::device_resources const&
 }
 
 template <typename value_t, typename value_idx, typename label_idx>
-void compute_chunked_a_b(raft::device_resources const& handle,
+void compute_chunked_a_b(raft::resources const& handle,
                          value_t* a,
                          value_t* b,
                          value_idx& row_offset,
@@ -169,7 +172,7 @@ void compute_chunked_a_b(raft::device_resources const& handle,
 
 template <typename value_t, typename value_idx, typename label_idx>
 value_t silhouette_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const value_t* X,
   value_idx n_rows,
   value_idx n_cols,
@@ -184,8 +187,8 @@ value_t silhouette_score(
 
   rmm::device_uvector<value_idx> cluster_counts = get_cluster_counts(handle, y, n_rows, n_labels);
 
-  auto stream = handle.get_stream();
-  auto policy = handle.get_thrust_policy();
+  auto stream = resource::get_cuda_stream(handle);
+  auto policy = resource::get_thrust_policy(handle);
 
   auto b_size = n_rows * n_labels;
 
@@ -211,7 +214,7 @@ value_t silhouette_score(
   detail::fill_b_kernel<<<grid_size, block_size, 0, stream>>>(
     b_ptr, y, n_rows, n_labels, cluster_counts.data());
 
-  handle.wait_stream_pool_on_stream();
+  resource::wait_stream_pool_on_stream(handle);
 
   auto n_iters = 0;
 
@@ -219,7 +222,7 @@ value_t silhouette_score(
     for (value_idx j = 0; j < n_rows; j += chunk) {
       ++n_iters;
 
-      auto chunk_stream = handle.get_next_usable_stream(i + chunk * j);
+      auto chunk_stream = resource::get_next_usable_stream(handle, i + chunk * j);
 
       const auto* left_begin  = X + (i * n_cols);
       const auto* right_begin = X + (j * n_cols);
@@ -245,7 +248,7 @@ value_t silhouette_score(
     }
   }
 
-  handle.sync_stream_pool();
+  resource::sync_stream_pool(handle);
 
   // calculating row-wise minimum in b
   // this prim only supports int indices for now
diff --git a/cpp/include/raft/stats/detail/cov.cuh b/cpp/include/raft/stats/detail/cov.cuh
index 0561ac269b..0f740c8ed9 100644
--- a/cpp/include/raft/stats/detail/cov.cuh
+++ b/cpp/include/raft/stats/detail/cov.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/resource/cublas_handle.hpp>
 #include <raft/linalg/gemm.cuh>
 #include <raft/stats/mean_center.cuh>
 
@@ -44,7 +45,7 @@ namespace detail {
  * function returns!
  */
 template <typename Type>
-void cov(raft::device_resources const& handle,
+void cov(raft::resources const& handle,
          Type* covar,
          Type* data,
          const Type* mu,
@@ -56,7 +57,7 @@ void cov(raft::device_resources const& handle,
          cudaStream_t stream)
 {
   if (stable) {
-    cublasHandle_t cublas_h = handle.get_cublas_handle();
+    cublasHandle_t cublas_h = resource::get_cublas_handle(handle);
 
     // since mean operation is assumed to be along a given column, broadcast
     // must be along rows!
diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh
index f3839b99c8..1e9c7c677c 100644
--- a/cpp/include/raft/stats/detail/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/silhouette_score.cuh
@@ -22,6 +22,7 @@
 #include <math.h>
 #include <numeric>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/add.cuh>
@@ -191,7 +192,7 @@ struct SilOp {
  */
 template <typename DataT, typename LabelT>
 DataT silhouette_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   const DataT* X_in,
   int nRows,
   int nCols,
@@ -307,7 +308,7 @@ DataT silhouette_score(
 
   DataT avgSilhouetteScore = d_avgSilhouetteScore.value(stream);
 
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   avgSilhouetteScore /= nRows;
 
diff --git a/cpp/include/raft/stats/detail/trustworthiness_score.cuh b/cpp/include/raft/stats/detail/trustworthiness_score.cuh
index 23f84754da..ebcc2e8655 100644
--- a/cpp/include/raft/stats/detail/trustworthiness_score.cuh
+++ b/cpp/include/raft/stats/detail/trustworthiness_score.cuh
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/matrix/col_wise_sort.cuh>
 #include <raft/spatial/knn/knn.cuh>
@@ -87,7 +88,7 @@ __global__ void compute_rank(double* rank,
  * @param[out] distances KNN distances
  */
 template <raft::distance::DistanceType distance_type, typename math_t>
-void run_knn(const raft::device_resources& h,
+void run_knn(const raft::resources& h,
              math_t* input,
              int n,
              int d,
@@ -128,7 +129,7 @@ void run_knn(const raft::device_resources& h,
  * @return Trustworthiness score
  */
 template <typename math_t, raft::distance::DistanceType distance_type>
-double trustworthiness_score(const raft::device_resources& h,
+double trustworthiness_score(const raft::resources& h,
                              const math_t* X,
                              math_t* X_embedded,
                              int n,
@@ -137,7 +138,7 @@ double trustworthiness_score(const raft::device_resources& h,
                              int n_neighbors,
                              int batchSize = 512)
 {
-  cudaStream_t stream = h.get_stream();
+  cudaStream_t stream = resource::get_cuda_stream(h);
 
   const int KNN_ALLOC = n * (n_neighbors + 1);
   rmm::device_uvector<int64_t> emb_ind(KNN_ALLOC, stream);
diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh
index 8600305d9e..216f637c94 100644
--- a/cpp/include/raft/stats/dispersion.cuh
+++ b/cpp/include/raft/stats/dispersion.cuh
@@ -21,6 +21,7 @@
 
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/dispersion.cuh>
 
 namespace raft {
@@ -81,7 +82,7 @@ DataT dispersion(const DataT* centroids,
  */
 template <typename value_t, typename idx_t>
 value_t cluster_dispersion(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> centroids,
   raft::device_vector_view<const idx_t, idx_t> cluster_sizes,
   std::optional<raft::device_vector_view<value_t, idx_t>> global_centroid,
@@ -103,7 +104,7 @@ value_t cluster_dispersion(
                                             centroids.extent(0),
                                             n_points,
                                             centroids.extent(1),
-                                            handle.get_stream());
+                                            resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_cluster_dispersion
@@ -117,7 +118,7 @@ value_t cluster_dispersion(
  */
 template <typename value_t, typename idx_t>
 value_t cluster_dispersion(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> centroids,
   raft::device_vector_view<const idx_t, idx_t> cluster_sizes,
   std::nullopt_t global_centroid,
diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh
index 05f08f52a4..fcc49fefd2 100644
--- a/cpp/include/raft/stats/entropy.cuh
+++ b/cpp/include/raft/stats/entropy.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/entropy.cuh>
 
 namespace raft {
@@ -64,7 +65,7 @@ double entropy(const T* clusterArray,
  * @return the entropy score
  */
 template <typename value_t, typename idx_t>
-double entropy(raft::device_resources const& handle,
+double entropy(raft::resources const& handle,
                raft::device_vector_view<const value_t, idx_t> cluster_array,
                const value_t lower_label_range,
                const value_t upper_label_range)
@@ -74,7 +75,7 @@ double entropy(raft::device_resources const& handle,
                          cluster_array.extent(0),
                          lower_label_range,
                          upper_label_range,
-                         handle.get_stream());
+                         resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_entropy
diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh
index d97d0759a0..c374251359 100644
--- a/cpp/include/raft/stats/histogram.cuh
+++ b/cpp/include/raft/stats/histogram.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/histogram.cuh>
 #include <raft/stats/stats_types.hpp>
 
@@ -91,7 +92,7 @@ void histogram(HistType type,
  * @note signature of binner_op is `int func(value_t, IdxT);`
  */
 template <typename value_t, typename idx_t, typename binner_op = IdentityBinner<value_t, idx_t>>
-void histogram(raft::device_resources const& handle,
+void histogram(raft::resources const& handle,
                HistType type,
                raft::device_matrix_view<const value_t, idx_t, raft::col_major> data,
                raft::device_matrix_view<int, idx_t, raft::col_major> bins,
@@ -108,7 +109,7 @@ void histogram(raft::device_resources const& handle,
                                                data.data_handle(),
                                                data.extent(0),
                                                data.extent(1),
-                                               handle.get_stream(),
+                                               resource::get_cuda_stream(handle),
                                                binner);
 }
 
diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh
index ca6c1ddf8e..ce7872d55d 100644
--- a/cpp/include/raft/stats/homogeneity_score.cuh
+++ b/cpp/include/raft/stats/homogeneity_score.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/homogeneity_score.cuh>
 
 namespace raft {
@@ -68,7 +69,7 @@ double homogeneity_score(const T* truthClusterArray,
  * @return the homogeneity score
  */
 template <typename value_t, typename idx_t>
-double homogeneity_score(raft::device_resources const& handle,
+double homogeneity_score(raft::resources const& handle,
                          raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
                          raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
                          value_t lower_label_range,
@@ -82,7 +83,7 @@ double homogeneity_score(raft::device_resources const& handle,
                                    truth_cluster_array.extent(0),
                                    lower_label_range,
                                    upper_label_range,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_homogeneity_score
diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh
index 2d865a566d..fc61a01605 100644
--- a/cpp/include/raft/stats/information_criterion.cuh
+++ b/cpp/include/raft/stats/information_criterion.cuh
@@ -30,7 +30,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/batched/information_criterion.cuh>
 #include <raft/stats/stats_types.hpp>
 
@@ -91,7 +92,7 @@ void information_criterion_batched(ScalarT* d_ic,
  * @param[in]  n_samples        Number of samples in each series
  */
 template <typename value_t, typename idx_t>
-void information_criterion_batched(raft::device_resources const& handle,
+void information_criterion_batched(raft::resources const& handle,
                                    raft::device_vector_view<const value_t, idx_t> d_loglikelihood,
                                    raft::device_vector_view<value_t, idx_t> d_ic,
                                    IC_Type ic_type,
@@ -107,7 +108,7 @@ void information_criterion_batched(raft::device_resources const& handle,
                                          n_params,
                                          d_ic.extent(0),
                                          n_samples,
-                                         handle.get_stream());
+                                         resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_information_criterion
diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh
index f19cc0d90d..2e01918d2a 100644
--- a/cpp/include/raft/stats/kl_divergence.cuh
+++ b/cpp/include/raft/stats/kl_divergence.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/kl_divergence.cuh>
 
 namespace raft {
@@ -60,15 +61,17 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size,
  * @return the KL Divergence value
  */
 template <typename value_t, typename idx_t>
-value_t kl_divergence(raft::device_resources const& handle,
+value_t kl_divergence(raft::resources const& handle,
                       raft::device_vector_view<const value_t, idx_t> modelPDF,
                       raft::device_vector_view<const value_t, idx_t> candidatePDF)
 {
   RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch");
   RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous");
   RAFT_EXPECTS(candidatePDF.is_exhaustive(), "candidatePDF must be contiguous");
-  return detail::kl_divergence(
-    modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream());
+  return detail::kl_divergence(modelPDF.data_handle(),
+                               candidatePDF.data_handle(),
+                               modelPDF.extent(0),
+                               resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group kl_divergence
diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh
index 303700e80d..96c9ca3b5c 100644
--- a/cpp/include/raft/stats/mean.cuh
+++ b/cpp/include/raft/stats/mean.cuh
@@ -20,7 +20,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/mean.cuh>
 
 namespace raft {
@@ -70,7 +71,7 @@ void mean(
  *   to normalize the output using N-1 or N, for true or false, respectively
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void mean(raft::device_resources const& handle,
+void mean(raft::resources const& handle,
           raft::device_matrix_view<const value_t, idx_t, layout_t> data,
           raft::device_vector_view<value_t, idx_t> mu,
           bool sample)
@@ -87,7 +88,7 @@ void mean(raft::device_resources const& handle,
                data.extent(0),
                sample,
                std::is_same_v<layout_t, raft::row_major>,
-               handle.get_stream());
+               resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_mean
diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh
index 2f1deb7467..48f5eb667f 100644
--- a/cpp/include/raft/stats/mean_center.cuh
+++ b/cpp/include/raft/stats/mean_center.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/mean_center.cuh>
 
 namespace raft {
@@ -96,7 +97,7 @@ void meanAdd(Type* out,
  * @param[in]  bcast_along_rows whether to broadcast vector along rows or columns
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void mean_center(raft::device_resources const& handle,
+void mean_center(raft::resources const& handle,
                  raft::device_matrix_view<const value_t, idx_t, layout_t> data,
                  raft::device_vector_view<const value_t, idx_t> mu,
                  raft::device_matrix_view<value_t, idx_t, layout_t> out,
@@ -117,7 +118,7 @@ void mean_center(raft::device_resources const& handle,
                                      data.extent(0),
                                      std::is_same_v<layout_t, raft::row_major>,
                                      bcast_along_rows,
-                                     handle.get_stream());
+                                     resource::get_cuda_stream(handle));
 }
 
 /**
@@ -133,7 +134,7 @@ void mean_center(raft::device_resources const& handle,
  * @param[in]  bcast_along_rows whether to broadcast vector along rows or columns
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void mean_add(raft::device_resources const& handle,
+void mean_add(raft::resources const& handle,
               raft::device_matrix_view<const value_t, idx_t, layout_t> data,
               raft::device_vector_view<const value_t, idx_t> mu,
               raft::device_matrix_view<value_t, idx_t, layout_t> out,
@@ -154,7 +155,7 @@ void mean_add(raft::device_resources const& handle,
                                   data.extent(0),
                                   std::is_same_v<layout_t, raft::row_major>,
                                   bcast_along_rows,
-                                  handle.get_stream());
+                                  resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_mean_center
diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh
index bad85f4260..f6127df701 100644
--- a/cpp/include/raft/stats/meanvar.cuh
+++ b/cpp/include/raft/stats/meanvar.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/meanvar.cuh>
 
 namespace raft::stats {
@@ -80,7 +81,7 @@ void meanvar(Type* mean,
  * normalize the variance using N-1 or N, for true or false respectively.
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void meanvar(raft::device_resources const& handle,
+void meanvar(raft::resources const& handle,
              raft::device_matrix_view<const value_t, idx_t, layout_t> data,
              raft::device_vector_view<value_t, idx_t> mean,
              raft::device_vector_view<value_t, idx_t> var,
@@ -101,7 +102,7 @@ void meanvar(raft::device_resources const& handle,
                   data.extent(0),
                   sample,
                   std::is_same_v<layout_t, raft::row_major>,
-                  handle.get_stream());
+                  resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_mean_var
diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh
index 10f1ea163b..0c5a62257d 100644
--- a/cpp/include/raft/stats/minmax.cuh
+++ b/cpp/include/raft/stats/minmax.cuh
@@ -20,6 +20,7 @@
 
 #include <optional>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/minmax.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -97,7 +98,7 @@ void minmax(const T* data,
  *    in shared memory
  */
 template <typename value_t, typename idx_t>
-void minmax(raft::device_resources const& handle,
+void minmax(raft::resources const& handle,
             raft::device_matrix_view<const value_t, idx_t, raft::col_major> data,
             std::optional<raft::device_vector_view<const unsigned, idx_t>> rowids,
             std::optional<raft::device_vector_view<const unsigned, idx_t>> colids,
@@ -133,7 +134,7 @@ void minmax(raft::device_resources const& handle,
                           globalmin.data_handle(),
                           globalmax.data_handle(),
                           sampledcols_ptr,
-                          handle.get_stream());
+                          resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_minmax
diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh
index be30bcd7fc..5c4ae43e09 100644
--- a/cpp/include/raft/stats/mutual_info_score.cuh
+++ b/cpp/include/raft/stats/mutual_info_score.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/mutual_info_score.cuh>
 
 namespace raft {
@@ -65,7 +66,7 @@ double mutual_info_score(const T* firstClusterArray,
  * @return the mutual information score
  */
 template <typename value_t, typename idx_t>
-double mutual_info_score(raft::device_resources const& handle,
+double mutual_info_score(raft::resources const& handle,
                          raft::device_vector_view<const value_t, idx_t> first_cluster_array,
                          raft::device_vector_view<const value_t, idx_t> second_cluster_array,
                          value_t lower_label_range,
@@ -80,7 +81,7 @@ double mutual_info_score(raft::device_resources const& handle,
                                    first_cluster_array.extent(0),
                                    lower_label_range,
                                    upper_label_range,
-                                   handle.get_stream());
+                                   resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_mutual_info
diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh
index 1048deb7f3..c98b4bc93a 100644
--- a/cpp/include/raft/stats/r2_score.cuh
+++ b/cpp/include/raft/stats/r2_score.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -69,7 +70,7 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream)
  * @note The constness of y and y_hat is currently casted away.
  */
 template <typename value_t, typename idx_t>
-value_t r2_score(raft::device_resources const& handle,
+value_t r2_score(raft::resources const& handle,
                  raft::device_vector_view<const value_t, idx_t> y,
                  raft::device_vector_view<const value_t, idx_t> y_hat)
 {
@@ -81,7 +82,7 @@ value_t r2_score(raft::device_resources const& handle,
   return detail::r2_score(const_cast<value_t*>(y.data_handle()),
                           const_cast<value_t*>(y_hat.data_handle()),
                           y.extent(0),
-                          handle.get_stream());
+                          resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_r2_score
diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh
index f0b37592e4..6a208c5492 100644
--- a/cpp/include/raft/stats/rand_index.cuh
+++ b/cpp/include/raft/stats/rand_index.cuh
@@ -19,7 +19,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/rand_index.cuh>
 
 namespace raft {
@@ -55,7 +56,7 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu
  * @return: The RandIndex value.
  */
 template <typename value_t, typename idx_t>
-double rand_index(raft::device_resources const& handle,
+double rand_index(raft::resources const& handle,
                   raft::device_vector_view<const value_t, idx_t> first_cluster_array,
                   raft::device_vector_view<const value_t, idx_t> second_cluster_array)
 {
@@ -66,7 +67,7 @@ double rand_index(raft::device_resources const& handle,
   return detail::compute_rand_index(first_cluster_array.data_handle(),
                                     second_cluster_array.data_handle(),
                                     second_cluster_array.extent(0),
-                                    handle.get_stream());
+                                    resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_rand_index
diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh
index 7c3ca7386b..718170f716 100644
--- a/cpp/include/raft/stats/regression_metrics.cuh
+++ b/cpp/include/raft/stats/regression_metrics.cuh
@@ -19,8 +19,9 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -73,7 +74,7 @@ void regression_metrics(const T* predictions,
  * ref_predictions[i]| for i in [0, n).
  */
 template <typename value_t, typename idx_t>
-void regression_metrics(raft::device_resources const& handle,
+void regression_metrics(raft::resources const& handle,
                         raft::device_vector_view<const value_t, idx_t> predictions,
                         raft::device_vector_view<const value_t, idx_t> ref_predictions,
                         raft::host_scalar_view<double> mean_abs_error,
@@ -92,7 +93,7 @@ void regression_metrics(raft::device_resources const& handle,
   detail::regression_metrics(predictions.data_handle(),
                              ref_predictions.data_handle(),
                              predictions.extent(0),
-                             handle.get_stream(),
+                             resource::get_cuda_stream(handle),
                              *mean_abs_error.data_handle(),
                              *mean_squared_error.data_handle(),
                              *median_abs_error.data_handle());
diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh
index db9db1f99a..23eef84604 100644
--- a/cpp/include/raft/stats/silhouette_score.cuh
+++ b/cpp/include/raft/stats/silhouette_score.cuh
@@ -19,6 +19,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/batched/silhouette_score.cuh>
 #include <raft/stats/detail/silhouette_score.cuh>
 
@@ -44,7 +45,7 @@ namespace stats {
  */
 template <typename DataT, typename LabelT>
 DataT silhouette_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   DataT* X_in,
   int nRows,
   int nCols,
@@ -60,7 +61,7 @@ DataT silhouette_score(
 
 template <typename value_t, typename value_idx, typename label_idx>
 value_t silhouette_score_batched(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   value_t* X,
   value_idx n_rows,
   value_idx n_cols,
@@ -98,7 +99,7 @@ value_t silhouette_score_batched(
  */
 template <typename value_t, typename label_t, typename idx_t>
 value_t silhouette_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_in,
   raft::device_vector_view<const label_t, idx_t> labels,
   std::optional<raft::device_vector_view<value_t, idx_t>> silhouette_score_per_sample,
@@ -120,7 +121,7 @@ value_t silhouette_score(
                                   labels.data_handle(),
                                   n_unique_labels,
                                   silhouette_score_per_sample_ptr,
-                                  handle.get_stream(),
+                                  resource::get_cuda_stream(handle),
                                   metric);
 }
 
@@ -144,7 +145,7 @@ value_t silhouette_score(
  */
 template <typename value_t, typename label_t, typename idx_t>
 value_t silhouette_score_batched(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
   raft::device_vector_view<const label_t, idx_t> labels,
   std::optional<raft::device_vector_view<value_t, idx_t>> silhouette_score_per_sample,
@@ -187,7 +188,7 @@ value_t silhouette_score_batched(
  */
 template <typename value_t, typename label_t, typename idx_t>
 value_t silhouette_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_in,
   raft::device_vector_view<const label_t, idx_t> labels,
   std::nullopt_t silhouette_score_per_sample,
@@ -207,7 +208,7 @@ value_t silhouette_score(
  */
 template <typename value_t, typename label_t, typename idx_t>
 value_t silhouette_score_batched(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
   raft::device_vector_view<const label_t, idx_t> labels,
   std::nullopt_t silhouette_score_per_sample,
diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh
index 7b0cc6cbe0..6349f8fd11 100644
--- a/cpp/include/raft/stats/stddev.cuh
+++ b/cpp/include/raft/stats/stddev.cuh
@@ -19,7 +19,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/stddev.cuh>
 
 namespace raft {
@@ -109,7 +110,7 @@ void vars(Type* var,
  *  to normalize the output using N-1 or N, for true or false, respectively
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void stddev(raft::device_resources const& handle,
+void stddev(raft::resources const& handle,
             raft::device_matrix_view<const value_t, idx_t, layout_t> data,
             raft::device_vector_view<const value_t, idx_t> mu,
             raft::device_vector_view<value_t, idx_t> std,
@@ -129,7 +130,7 @@ void stddev(raft::device_resources const& handle,
                  data.extent(0),
                  sample,
                  is_row_major,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_stddev
@@ -156,7 +157,7 @@ void stddev(raft::device_resources const& handle,
  *  to normalize the output using N-1 or N, for true or false, respectively
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void vars(raft::device_resources const& handle,
+void vars(raft::resources const& handle,
           raft::device_matrix_view<const value_t, idx_t, layout_t> data,
           raft::device_vector_view<const value_t, idx_t> mu,
           raft::device_vector_view<value_t, idx_t> var,
@@ -176,7 +177,7 @@ void vars(raft::device_resources const& handle,
                data.extent(0),
                sample,
                is_row_major,
-               handle.get_stream());
+               resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_variance
diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh
index d033dc8892..2ac9cd9eb5 100644
--- a/cpp/include/raft/stats/sum.cuh
+++ b/cpp/include/raft/stats/sum.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/sum.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -64,7 +65,7 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c
  * @param[out] output the output mean vector
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void sum(raft::device_resources const& handle,
+void sum(raft::resources const& handle,
          raft::device_matrix_view<const value_t, idx_t, layout_t> input,
          raft::device_vector_view<value_t, idx_t> output)
 {
@@ -79,7 +80,7 @@ void sum(raft::device_resources const& handle,
               input.extent(1),
               input.extent(0),
               is_row_major,
-              handle.get_stream());
+              resource::get_cuda_stream(handle));
 }
 
 /** @} */  // end group stats_sum
diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh
index a79cda8dfc..3f4464f4d3 100644
--- a/cpp/include/raft/stats/trustworthiness_score.cuh
+++ b/cpp/include/raft/stats/trustworthiness_score.cuh
@@ -19,7 +19,7 @@
 
 #pragma once
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/trustworthiness_score.cuh>
 
 namespace raft {
@@ -38,7 +38,7 @@ namespace stats {
  * @return[out] Trustworthiness score
  */
 template <typename math_t, raft::distance::DistanceType distance_type>
-double trustworthiness_score(const raft::device_resources& h,
+double trustworthiness_score(const raft::resources& h,
                              const math_t* X,
                              math_t* X_embedded,
                              int n,
@@ -71,7 +71,7 @@ double trustworthiness_score(const raft::device_resources& h,
  */
 template <raft::distance::DistanceType distance_type, typename value_t, typename idx_t>
 double trustworthiness_score(
-  raft::device_resources const& handle,
+  raft::resources const& handle,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
   raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_embedded,
   int n_neighbors,
diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh
index 948dd0a6ef..8ea5c65600 100644
--- a/cpp/include/raft/stats/v_measure.cuh
+++ b/cpp/include/raft/stats/v_measure.cuh
@@ -19,7 +19,8 @@
 
 #pragma once
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/detail/v_measure.cuh>
 
 namespace raft {
@@ -68,7 +69,7 @@ double v_measure(const T* truthClusterArray,
  * @return the v-measure between the two clusters
  */
 template <typename value_t, typename idx_t>
-double v_measure(raft::device_resources const& handle,
+double v_measure(raft::resources const& handle,
                  raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
                  raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
                  value_t lower_label_range,
@@ -85,7 +86,7 @@ double v_measure(raft::device_resources const& handle,
                            truth_cluster_array.extent(0),
                            lower_label_range,
                            upper_label_range,
-                           handle.get_stream(),
+                           resource::get_cuda_stream(handle),
                            beta);
 }
 
diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh
index 273adf1641..7d06d5dff1 100644
--- a/cpp/include/raft/stats/weighted_mean.cuh
+++ b/cpp/include/raft/stats/weighted_mean.cuh
@@ -20,6 +20,7 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/detail/weighted_mean.cuh>
 
 namespace raft {
@@ -112,7 +113,7 @@ void colWeightedMean(
  * @param[in]  along_rows whether to reduce along rows or columns
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void weighted_mean(raft::device_resources const& handle,
+void weighted_mean(raft::resources const& handle,
                    raft::device_matrix_view<const value_t, idx_t, layout_t> data,
                    raft::device_vector_view<const value_t, idx_t> weights,
                    raft::device_vector_view<value_t, idx_t> mu,
@@ -138,7 +139,7 @@ void weighted_mean(raft::device_resources const& handle,
                        data.extent(0),
                        is_row_major,
                        along_rows,
-                       handle.get_stream());
+                       resource::get_cuda_stream(handle));
 }
 
 /**
@@ -154,7 +155,7 @@ void weighted_mean(raft::device_resources const& handle,
  * @param[out] mu the output mean vector of size nrows
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void row_weighted_mean(raft::device_resources const& handle,
+void row_weighted_mean(raft::resources const& handle,
                        raft::device_matrix_view<const value_t, idx_t, layout_t> data,
                        raft::device_vector_view<const value_t, idx_t> weights,
                        raft::device_vector_view<value_t, idx_t> mu)
@@ -175,7 +176,7 @@ void row_weighted_mean(raft::device_resources const& handle,
  * @param[out] mu the output mean vector of size ncols
  */
 template <typename value_t, typename idx_t, typename layout_t>
-void col_weighted_mean(raft::device_resources const& handle,
+void col_weighted_mean(raft::resources const& handle,
                        raft::device_matrix_view<const value_t, idx_t, layout_t> data,
                        raft::device_vector_view<const value_t, idx_t> weights,
                        raft::device_vector_view<value_t, idx_t> mu)
diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh
index 11b1edee73..64b180de2a 100644
--- a/cpp/include/raft/util/cache.cuh
+++ b/cpp/include/raft/util/cache.cuh
@@ -67,7 +67,7 @@ namespace raft::cache {
  * // We assume that our ML algo repeatedly calls calc, and the set of keys have
  * // an overlap. We will use the cache to avoid repeated calculations.
  *
- * // Assume we have raft::device_resources& h, and cudaStream_t stream
+ * // Assume we have raft::resources& h, and cudaStream_t stream
  * Cache<float> cache(h.get_device_allocator(), stream, m);
  *
  * // A buffer that we will reuse to store the cache indices.
diff --git a/cpp/include/raft_runtime/cluster/kmeans.hpp b/cpp/include/raft_runtime/cluster/kmeans.hpp
index aab8c14eab..d47faf1510 100644
--- a/cpp/include/raft_runtime/cluster/kmeans.hpp
+++ b/cpp/include/raft_runtime/cluster/kmeans.hpp
@@ -15,8 +15,8 @@
  */
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 #include <raft/cluster/kmeans_types.hpp>
@@ -28,7 +28,7 @@ namespace raft::runtime::cluster::kmeans {
  * @{
  */
 
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       const float* X,
                       int n_samples,
                       int n_features,
@@ -39,7 +39,7 @@ void update_centroids(raft::device_resources const& handle,
                       float* new_centroids,
                       float* weight_per_cluster);
 
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       const double* X,
                       int n_samples,
                       int n_features,
@@ -50,7 +50,7 @@ void update_centroids(raft::device_resources const& handle,
                       double* new_centroids,
                       double* weight_per_cluster);
 
-void fit(raft::device_resources const& handle,
+void fit(raft::resources const& handle,
          const raft::cluster::kmeans::KMeansParams& params,
          raft::device_matrix_view<const float, int, row_major> X,
          std::optional<raft::device_vector_view<const float, int>> sample_weight,
@@ -58,7 +58,7 @@ void fit(raft::device_resources const& handle,
          raft::host_scalar_view<float, int> inertia,
          raft::host_scalar_view<int, int> n_iter);
 
-void fit(raft::device_resources const& handle,
+void fit(raft::resources const& handle,
          const raft::cluster::kmeans::KMeansParams& params,
          raft::device_matrix_view<const double, int, row_major> X,
          std::optional<raft::device_vector_view<const double, int>> sample_weight,
@@ -66,17 +66,17 @@ void fit(raft::device_resources const& handle,
          raft::host_scalar_view<double, int> inertia,
          raft::host_scalar_view<int, int> n_iter);
 
-void init_plus_plus(raft::device_resources const& handle,
+void init_plus_plus(raft::resources const& handle,
                     const raft::cluster::kmeans::KMeansParams& params,
                     raft::device_matrix_view<const float, int, row_major> X,
                     raft::device_matrix_view<float, int, row_major> centroids);
 
-void init_plus_plus(raft::device_resources const& handle,
+void init_plus_plus(raft::resources const& handle,
                     const raft::cluster::kmeans::KMeansParams& params,
                     raft::device_matrix_view<const double, int, row_major> X,
                     raft::device_matrix_view<double, int, row_major> centroids);
 
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   const float* X,
                   int n_samples,
                   int n_features,
@@ -84,7 +84,7 @@ void cluster_cost(raft::device_resources const& handle,
                   const float* centroids,
                   float* cost);
 
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   const double* X,
                   int n_samples,
                   int n_features,
diff --git a/cpp/include/raft_runtime/distance/fused_l2_nn.hpp b/cpp/include/raft_runtime/distance/fused_l2_nn.hpp
index bdac3723e2..6154e03f4c 100644
--- a/cpp/include/raft_runtime/distance/fused_l2_nn.hpp
+++ b/cpp/include/raft_runtime/distance/fused_l2_nn.hpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::distance {
@@ -42,7 +42,7 @@ namespace raft::runtime::distance {
  * @param[in]  k             gemm k
  * @param[in]  sqrt          Whether the output `minDist` should contain L2-sqrt
  */
-void fused_l2_nn_min_arg(raft::device_resources const& handle,
+void fused_l2_nn_min_arg(raft::resources const& handle,
                          int* min,
                          const float* x,
                          const float* y,
@@ -51,7 +51,7 @@ void fused_l2_nn_min_arg(raft::device_resources const& handle,
                          int k,
                          bool sqrt);
 
-void fused_l2_nn_min_arg(raft::device_resources const& handle,
+void fused_l2_nn_min_arg(raft::resources const& handle,
                          int* min,
                          const double* x,
                          const double* y,
diff --git a/cpp/include/raft_runtime/distance/pairwise_distance.hpp b/cpp/include/raft_runtime/distance/pairwise_distance.hpp
index 751f821ffb..fa45006a60 100644
--- a/cpp/include/raft_runtime/distance/pairwise_distance.hpp
+++ b/cpp/include/raft_runtime/distance/pairwise_distance.hpp
@@ -23,7 +23,7 @@ namespace raft::runtime::distance {
  * @{
  */
 
-void pairwise_distance(raft::device_resources const& handle,
+void pairwise_distance(raft::resources const& handle,
                        float* x,
                        float* y,
                        float* dists,
@@ -34,7 +34,7 @@ void pairwise_distance(raft::device_resources const& handle,
                        bool isRowMajor,
                        float metric_arg);
 
-void pairwise_distance(raft::device_resources const& handle,
+void pairwise_distance(raft::resources const& handle,
                        double* x,
                        double* y,
                        double* dists,
diff --git a/cpp/include/raft_runtime/matrix/select_k.hpp b/cpp/include/raft_runtime/matrix/select_k.hpp
index 08c0e01d0a..dcd40aac3c 100644
--- a/cpp/include/raft_runtime/matrix/select_k.hpp
+++ b/cpp/include/raft_runtime/matrix/select_k.hpp
@@ -17,12 +17,12 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 #include <optional>
 
 namespace raft::runtime::matrix {
-void select_k(const device_resources& handle,
+void select_k(const resources& handle,
               raft::device_matrix_view<const float, int64_t, row_major> in_val,
               std::optional<raft::device_matrix_view<const int64_t, int64_t, row_major>> in_idx,
               raft::device_matrix_view<float, int64_t, row_major> out_val,
diff --git a/cpp/include/raft_runtime/neighbors/brute_force.hpp b/cpp/include/raft_runtime/neighbors/brute_force.hpp
index 12da6ff101..3bc19b2a2c 100644
--- a/cpp/include/raft_runtime/neighbors/brute_force.hpp
+++ b/cpp/include/raft_runtime/neighbors/brute_force.hpp
@@ -17,12 +17,12 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::runtime::neighbors::brute_force {
 
 #define RAFT_INST_BFKNN(IDX_T, DATA_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT)        \
-  void knn(raft::device_resources const& handle,                                         \
+  void knn(raft::resources const& handle,                                                \
            raft::device_matrix_view<const DATA_T, MATRIX_IDX_T, INDEX_LAYOUT> index,     \
            raft::device_matrix_view<const DATA_T, MATRIX_IDX_T, SEARCH_LAYOUT> search,   \
            raft::device_matrix_view<IDX_T, MATRIX_IDX_T, row_major> indices,             \
diff --git a/cpp/include/raft_runtime/neighbors/ivf_flat.hpp b/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
index 18ea064015..37a9d39ae3 100644
--- a/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
+++ b/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
@@ -24,23 +24,23 @@ namespace raft::runtime::neighbors::ivf_flat {
 // wrappers, where exception handling is not compatible with return type that has nontrivial
 // constructor.
 #define RAFT_INST_BUILD_EXTEND(T, IdxT)                                              \
-  auto build(raft::device_resources const& handle,                                   \
+  auto build(raft::resources const& handle,                                          \
              const raft::neighbors::ivf_flat::index_params& params,                  \
              raft::device_matrix_view<const T, IdxT, row_major> dataset)             \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                                     \
                                                                                      \
-  auto extend(raft::device_resources const& handle,                                  \
+  auto extend(raft::resources const& handle,                                         \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
               const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                                     \
                                                                                      \
-  void build(raft::device_resources const& handle,                                   \
+  void build(raft::resources const& handle,                                          \
              const raft::neighbors::ivf_flat::index_params& params,                  \
              raft::device_matrix_view<const T, IdxT, row_major> dataset,             \
              raft::neighbors::ivf_flat::index<T, IdxT>& idx);                        \
                                                                                      \
-  void extend(raft::device_resources const& handle,                                  \
+  void extend(raft::resources const& handle,                                         \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
               raft::neighbors::ivf_flat::index<T, IdxT>* idx);
@@ -52,7 +52,7 @@ RAFT_INST_BUILD_EXTEND(uint8_t, int64_t)
 #undef RAFT_INST_BUILD_EXTEND
 
 #define RAFT_INST_SEARCH(T, IdxT)                                 \
-  void search(raft::device_resources const&,                      \
+  void search(raft::resources const&,                             \
               raft::neighbors::ivf_flat::search_params const&,    \
               raft::neighbors::ivf_flat::index<T, IdxT> const&,   \
               raft::device_matrix_view<const T, IdxT, row_major>, \
diff --git a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
index 17260b0ded..19cdbdf220 100644
--- a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
+++ b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp
@@ -25,22 +25,22 @@ namespace raft::runtime::neighbors::ivf_pq {
 // constructor.
 #define RAFT_DECL_BUILD_EXTEND(T, IdxT)                                              \
   [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> build(                          \
-    raft::device_resources const& handle,                                            \
+    raft::resources const& handle,                                                   \
     const raft::neighbors::ivf_pq::index_params& params,                             \
     raft::device_matrix_view<const T, IdxT, row_major> dataset);                     \
                                                                                      \
-  void build(raft::device_resources const& handle,                                   \
+  void build(raft::resources const& handle,                                          \
              const raft::neighbors::ivf_pq::index_params& params,                    \
              raft::device_matrix_view<const T, IdxT, row_major> dataset,             \
              raft::neighbors::ivf_pq::index<IdxT>* idx);                             \
                                                                                      \
   [[nodiscard]] raft::neighbors::ivf_pq::index<IdxT> extend(                         \
-    raft::device_resources const& handle,                                            \
+    raft::resources const& handle,                                                   \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                  \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,           \
     const raft::neighbors::ivf_pq::index<IdxT>& idx);                                \
                                                                                      \
-  void extend(raft::device_resources const& handle,                                  \
+  void extend(raft::resources const& handle,                                         \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
               raft::neighbors::ivf_pq::index<IdxT>* idx);
@@ -52,7 +52,7 @@ RAFT_DECL_BUILD_EXTEND(uint8_t, int64_t);
 #undef RAFT_DECL_BUILD_EXTEND
 
 #define RAFT_DECL_SEARCH(T, IdxT)                                         \
-  void search(raft::device_resources const& handle,                       \
+  void search(raft::resources const& handle,                              \
               const raft::neighbors::ivf_pq::search_params& params,       \
               const raft::neighbors::ivf_pq::index<IdxT>& idx,            \
               raft::device_matrix_view<const T, IdxT, row_major> queries, \
@@ -75,7 +75,7 @@ RAFT_DECL_SEARCH(uint8_t, int64_t);
  * @param[in] index IVF-PQ index
  *
  */
-void serialize(raft::device_resources const& handle,
+void serialize(raft::resources const& handle,
                const std::string& filename,
                const raft::neighbors::ivf_pq::index<int64_t>& index);
 
@@ -89,7 +89,7 @@ void serialize(raft::device_resources const& handle,
  * @param[in] index IVF-PQ index
  *
  */
-void deserialize(raft::device_resources const& handle,
+void deserialize(raft::resources const& handle,
                  const std::string& filename,
                  raft::neighbors::ivf_pq::index<int64_t>* index);
 
diff --git a/cpp/include/raft_runtime/neighbors/refine.hpp b/cpp/include/raft_runtime/neighbors/refine.hpp
index 2c162c2faa..fba7d0fc0e 100644
--- a/cpp/include/raft_runtime/neighbors/refine.hpp
+++ b/cpp/include/raft_runtime/neighbors/refine.hpp
@@ -17,13 +17,13 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 // #include <raft/core/host_mdspan.hpp>
 
 namespace raft::runtime::neighbors {
 
 #define RAFT_INST_REFINE(IDX_T, DATA_T)                                                      \
-  void refine(raft::device_resources const& handle,                                          \
+  void refine(raft::resources const& handle,                                                 \
               raft::device_matrix_view<const DATA_T, int64_t, row_major> dataset,            \
               raft::device_matrix_view<const DATA_T, int64_t, row_major> queries,            \
               raft::device_matrix_view<const IDX_T, int64_t, row_major> neighbor_candidates, \
@@ -31,7 +31,7 @@ namespace raft::runtime::neighbors {
               raft::device_matrix_view<float, int64_t, row_major> distances,                 \
               distance::DistanceType metric);                                                \
                                                                                              \
-  void refine(raft::device_resources const& handle,                                          \
+  void refine(raft::resources const& handle,                                                 \
               raft::host_matrix_view<const DATA_T, int64_t, row_major> dataset,              \
               raft::host_matrix_view<const DATA_T, int64_t, row_major> queries,              \
               raft::host_matrix_view<const IDX_T, int64_t, row_major> neighbor_candidates,   \
diff --git a/cpp/include/raft_runtime/random/rmat_rectangular_generator.hpp b/cpp/include/raft_runtime/random/rmat_rectangular_generator.hpp
index 8f18fd1388..bf5259aeba 100644
--- a/cpp/include/raft_runtime/random/rmat_rectangular_generator.hpp
+++ b/cpp/include/raft_runtime/random/rmat_rectangular_generator.hpp
@@ -16,7 +16,7 @@
 
 #include <cstdint>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng_state.hpp>
 
 namespace raft::runtime::random {
@@ -26,15 +26,15 @@ namespace raft::runtime::random {
  * @{
  */
 
-#define FUNC_DECL(IdxT, ProbT)                                    \
-  void rmat_rectangular_gen(raft::device_resources const& handle, \
-                            IdxT* out,                            \
-                            IdxT* out_src,                        \
-                            IdxT* out_dst,                        \
-                            const ProbT* theta,                   \
-                            IdxT r_scale,                         \
-                            IdxT c_scale,                         \
-                            IdxT n_edges,                         \
+#define FUNC_DECL(IdxT, ProbT)                             \
+  void rmat_rectangular_gen(raft::resources const& handle, \
+                            IdxT* out,                     \
+                            IdxT* out_src,                 \
+                            IdxT* out_dst,                 \
+                            const ProbT* theta,            \
+                            IdxT r_scale,                  \
+                            IdxT c_scale,                  \
+                            IdxT n_edges,                  \
                             raft::random::RngState& r)
 
 FUNC_DECL(int, float);
diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh
index 1af3859ce7..ac9a0bb717 100644
--- a/cpp/internal/raft_internal/matrix/select_k.cuh
+++ b/cpp/internal/raft_internal/matrix/select_k.cuh
@@ -16,7 +16,8 @@
 
 #pragma once
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/detail/select_radix.cuh>
 #include <raft/matrix/detail/select_warpsort.cuh>
 #include <raft/matrix/select_k.cuh>
@@ -75,7 +76,7 @@ inline auto operator<<(std::ostream& os, const Algo& algo) -> std::ostream&
 }
 
 template <typename T, typename IdxT>
-void select_k_impl(const device_resources& handle,
+void select_k_impl(const resources& handle,
                    const Algo& algo,
                    const T* in,
                    const IdxT* in_idx,
@@ -86,7 +87,7 @@ void select_k_impl(const device_resources& handle,
                    IdxT* out_idx,
                    bool select_min)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   switch (algo) {
     case Algo::kPublicApi: {
       auto in_extent  = make_extents<int64_t>(batch_size, len);
diff --git a/cpp/internal/raft_internal/neighbors/refine_helper.cuh b/cpp/internal/raft_internal/neighbors/refine_helper.cuh
index 4546ce479a..67217d1e0e 100644
--- a/cpp/internal/raft_internal/neighbors/refine_helper.cuh
+++ b/cpp/internal/raft_internal/neighbors/refine_helper.cuh
@@ -15,13 +15,14 @@
  */
 #pragma once
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft_internal/neighbors/naive_knn.cuh>
 
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/random/rng.cuh>
 
@@ -45,9 +46,9 @@ struct RefineInputs {
 template <typename DataT, typename DistanceT, typename IdxT>
 class RefineHelper {
  public:
-  RefineHelper(const raft::device_resources& handle, RefineInputs<IdxT> params)
+  RefineHelper(const raft::resources& handle, RefineInputs<IdxT> params)
     : handle_(handle),
-      stream_(handle.get_stream()),
+      stream_(resource::get_cuda_stream(handle)),
       p(params),
       dataset(handle),
       queries(handle),
@@ -89,7 +90,7 @@ class RefineHelper {
                                         p.k0,
                                         p.metric,
                                         stream_);
-      handle_.sync_stream(stream_);
+      resource::sync_stream(handle_, stream_);
     }
 
     if (p.host_data) {
@@ -104,7 +105,7 @@ class RefineHelper {
 
       refined_distances_host = raft::make_host_matrix<DistanceT, IdxT>(p.n_queries, p.k);
       refined_indices_host   = raft::make_host_matrix<IdxT, IdxT>(p.n_queries, p.k);
-      handle_.sync_stream(stream_);
+      resource::sync_stream(handle_, stream_);
     }
 
     // Generate ground thruth for testing.
@@ -126,13 +127,13 @@ class RefineHelper {
       raft::copy(true_refined_indices_host.data(), indices_dev.data(), indices_dev.size(), stream_);
       raft::copy(
         true_refined_distances_host.data(), distances_dev.data(), distances_dev.size(), stream_);
-      handle_.sync_stream(stream_);
+      resource::sync_stream(handle_, stream_);
     }
   }
 
  public:
   RefineInputs<IdxT> p;
-  const raft::device_resources& handle_;
+  const raft::resources& handle_;
   rmm::cuda_stream_view stream_;
 
   raft::device_matrix<DataT, IdxT, row_major> dataset;
diff --git a/cpp/src/neighbors/ball_cover.cu b/cpp/src/neighbors/ball_cover.cu
index 4c49c1847b..3b129e168b 100644
--- a/cpp/src/neighbors/ball_cover.cu
+++ b/cpp/src/neighbors/ball_cover.cu
@@ -19,11 +19,11 @@
 
 #define instantiate_raft_neighbors_ball_cover(idx_t, value_t, int_t, matrix_idx_t)                 \
   template void raft::neighbors::ball_cover::build_index<idx_t, value_t, int_t, matrix_idx_t>(     \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index);      \
                                                                                                    \
   template void raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(   \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
     int_t k,                                                                                       \
     idx_t* inds,                                                                                   \
@@ -32,7 +32,7 @@
     float weight);                                                                                 \
                                                                                                    \
   template void raft::neighbors::ball_cover::all_knn_query<idx_t, value_t, int_t, matrix_idx_t>(   \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index,       \
     raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
     raft::device_matrix_view<value_t, matrix_idx_t, row_major> dists,                              \
@@ -41,7 +41,7 @@
     float weight);                                                                                 \
                                                                                                    \
   template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t>(                     \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t>& index,               \
     int_t k,                                                                                       \
     const value_t* query,                                                                          \
@@ -52,7 +52,7 @@
     float weight);                                                                                 \
                                                                                                    \
   template void raft::neighbors::ball_cover::knn_query<idx_t, value_t, int_t, matrix_idx_t>(       \
-    raft::device_resources const& handle,                                                          \
+    raft::resources const& handle,                                                                 \
     const raft::neighbors::ball_cover::BallCoverIndex<idx_t, value_t, int_t, matrix_idx_t>& index, \
     raft::device_matrix_view<const value_t, matrix_idx_t, row_major> query,                        \
     raft::device_matrix_view<idx_t, matrix_idx_t, row_major> inds,                                 \
diff --git a/cpp/src/neighbors/brute_force_00_generate.py b/cpp/src/neighbors/brute_force_00_generate.py
index 251dd53b1c..9adc5fef90 100644
--- a/cpp/src/neighbors/brute_force_00_generate.py
+++ b/cpp/src/neighbors/brute_force_00_generate.py
@@ -46,7 +46,7 @@
 knn_macro = """
 #define instantiate_raft_neighbors_brute_force_knn(idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op) \\
     template void raft::neighbors::brute_force::knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>( \\
-        raft::device_resources const& handle,                           \\
+        raft::resources const& handle,                           \\
         std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \\
         raft::device_matrix_view<const value_t, matrix_idx, search_layout> search, \\
         raft::device_matrix_view<idx_t, matrix_idx, row_major> indices, \\
@@ -61,7 +61,7 @@
 fused_l2_knn_macro = """
 #define instantiate_raft_neighbors_brute_force_fused_l2_knn(value_t, idx_t, idx_layout, query_layout) \\
     template void raft::neighbors::brute_force::fused_l2_knn(    \\
-        raft::device_resources const& handle,                           \\
+        raft::resources const& handle,                           \\
         raft::device_matrix_view<const value_t, idx_t, idx_layout> index, \\
         raft::device_matrix_view<const value_t, idx_t, query_layout> query, \\
         raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,     \\
diff --git a/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu b/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
index 4e1805f9a8..d482d24772 100644
--- a/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
+++ b/cpp/src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
@@ -30,7 +30,7 @@
 #define instantiate_raft_neighbors_brute_force_fused_l2_knn(            \
   value_t, idx_t, idx_layout, query_layout)                             \
   template void raft::neighbors::brute_force::fused_l2_knn(             \
-    raft::device_resources const& handle,                               \
+    raft::resources const& handle,                                      \
     raft::device_matrix_view<const value_t, idx_t, idx_layout> index,   \
     raft::device_matrix_view<const value_t, idx_t, query_layout> query, \
     raft::device_matrix_view<idx_t, idx_t, row_major> out_inds,         \
diff --git a/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu b/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
index a668b076d6..a05b387dae 100644
--- a/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
+++ b/cpp/src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
@@ -31,7 +31,7 @@
   idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
   template void raft::neighbors::brute_force::                                              \
     knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
-      raft::device_resources const& handle,                                                 \
+      raft::resources const& handle,                                                        \
       std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
       raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
       raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
diff --git a/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu b/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
index 21cac5034a..d1b9a4bb43 100644
--- a/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
+++ b/cpp/src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
@@ -31,7 +31,7 @@
   idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
   template void raft::neighbors::brute_force::                                              \
     knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
-      raft::device_resources const& handle,                                                 \
+      raft::resources const& handle,                                                        \
       std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
       raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
       raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
diff --git a/cpp/src/neighbors/brute_force_knn_int_float_int.cu b/cpp/src/neighbors/brute_force_knn_int_float_int.cu
index b76fe09c2a..354f72e3f5 100644
--- a/cpp/src/neighbors/brute_force_knn_int_float_int.cu
+++ b/cpp/src/neighbors/brute_force_knn_int_float_int.cu
@@ -31,7 +31,7 @@
   idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
   template void raft::neighbors::brute_force::                                              \
     knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
-      raft::device_resources const& handle,                                                 \
+      raft::resources const& handle,                                                        \
       std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
       raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
       raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
diff --git a/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu b/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
index 4d3f627182..2f6bd9b888 100644
--- a/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
+++ b/cpp/src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
@@ -31,7 +31,7 @@
   idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op)                     \
   template void raft::neighbors::brute_force::                                              \
     knn<idx_t, value_t, matrix_idx, index_layout, search_layout, epilogue_op>(              \
-      raft::device_resources const& handle,                                                 \
+      raft::resources const& handle,                                                        \
       std::vector<raft::device_matrix_view<const value_t, matrix_idx, index_layout>> index, \
       raft::device_matrix_view<const value_t, matrix_idx, search_layout> search,            \
       raft::device_matrix_view<idx_t, matrix_idx, row_major> indices,                       \
diff --git a/cpp/src/neighbors/detail/ivf_flat_search.cu b/cpp/src/neighbors/detail/ivf_flat_search.cu
index 345a8f499d..001281c8fc 100644
--- a/cpp/src/neighbors/detail/ivf_flat_search.cu
+++ b/cpp/src/neighbors/detail/ivf_flat_search.cu
@@ -18,7 +18,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_detail_search(T, IdxT)  \
   template void raft::neighbors::ivf_flat::detail::search<T, IdxT>( \
-    raft::device_resources const& handle,                           \
+    raft::resources const& handle,                                  \
     const search_params& params,                                    \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,         \
     const T* queries,                                               \
diff --git a/cpp/src/neighbors/ivf_flat_00_generate.py b/cpp/src/neighbors/ivf_flat_00_generate.py
index 44ea9709c2..b02606a23e 100644
--- a/cpp/src/neighbors/ivf_flat_00_generate.py
+++ b/cpp/src/neighbors/ivf_flat_00_generate.py
@@ -49,7 +49,7 @@
 build_macro = """
 #define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)        \\
   template auto raft::neighbors::ivf_flat::build<T, IdxT>( \\
-    raft::device_resources const& handle,                         \\
+    raft::resources const& handle,                         \\
     const raft::neighbors::ivf_flat::index_params& params,        \\
     const T* dataset,                                             \\
     IdxT n_rows,                                                  \\
@@ -57,13 +57,13 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \\
                                                                   \\
   template auto raft::neighbors::ivf_flat::build<T, IdxT>( \\
-    raft::device_resources const& handle,                         \\
+    raft::resources const& handle,                         \\
     const raft::neighbors::ivf_flat::index_params& params,        \\
     raft::device_matrix_view<const T, IdxT, row_major> dataset)   \\
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                  \\
                                                                   \\
   template void raft::neighbors::ivf_flat::build<T, IdxT>( \\
-    raft::device_resources const& handle,                         \\
+    raft::resources const& handle,                         \\
     const raft::neighbors::ivf_flat::index_params& params,        \\
     raft::device_matrix_view<const T, IdxT, row_major> dataset,   \\
     raft::neighbors::ivf_flat::index<T, IdxT>& idx);
@@ -72,7 +72,7 @@
 extend_macro = """
 #define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \\
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
-    raft::device_resources const& handle,                                  \\
+    raft::resources const& handle,                                  \\
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \\
     const T* new_vectors,                                                  \\
     const IdxT* new_indices,                                               \\
@@ -80,21 +80,21 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \\
                                                                            \\
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
-    raft::device_resources const& handle,                                  \\
+    raft::resources const& handle,                                  \\
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \\
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \\
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \\
                                                                            \\
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
-    raft::device_resources const& handle,                                  \\
+    raft::resources const& handle,                                  \\
     raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \\
     const T* new_vectors,                                                  \\
     const IdxT* new_indices,                                               \\
     IdxT n_rows);                                                          \\
                                                                            \\
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(         \\
-    raft::device_resources const& handle,                                  \\
+    raft::resources const& handle,                                  \\
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \\
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \\
     raft::neighbors::ivf_flat::index<T, IdxT>* index);
@@ -103,7 +103,7 @@
 search_macro = """
 #define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)        \\
   template void raft::neighbors::ivf_flat::search<T, IdxT>( \\
-    raft::device_resources const& handle,                          \\
+    raft::resources const& handle,                          \\
     const raft::neighbors::ivf_flat::search_params& params,        \\
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \\
     const T* queries,                                              \\
@@ -114,7 +114,7 @@
     rmm::mr::device_memory_resource* mr );                         \\
                                                                    \\
   template void raft::neighbors::ivf_flat::search<T, IdxT>( \\
-    raft::device_resources const& handle,                          \\
+    raft::resources const& handle,                          \\
     const raft::neighbors::ivf_flat::search_params& params,        \\
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,        \\
     raft::device_matrix_view<const T, IdxT, row_major> queries,    \\
diff --git a/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
index 622f7c7d90..2ae795db56 100644
--- a/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_build_float_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     const T* dataset,                                           \
     IdxT n_rows,                                                \
@@ -35,13 +35,13 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset) \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset, \
     raft::neighbors::ivf_flat::index<T, IdxT>& idx);
diff --git a/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
index 7b1eeae32d..deb31bf441 100644
--- a/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_build_int8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     const T* dataset,                                           \
     IdxT n_rows,                                                \
@@ -35,13 +35,13 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset) \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset, \
     raft::neighbors::ivf_flat::index<T, IdxT>& idx);
diff --git a/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
index 40cf28151f..402fdbab97 100644
--- a/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_build(T, IdxT)      \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     const T* dataset,                                           \
     IdxT n_rows,                                                \
@@ -35,13 +35,13 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template auto raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset) \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                \
                                                                 \
   template void raft::neighbors::ivf_flat::build<T, IdxT>(      \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::index_params& params,      \
     raft::device_matrix_view<const T, IdxT, row_major> dataset, \
     raft::neighbors::ivf_flat::index<T, IdxT>& idx);
diff --git a/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
index f7d99d7081..9e7701f773 100644
--- a/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_extend_float_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
@@ -35,21 +35,21 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
     IdxT n_rows);                                                          \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     raft::neighbors::ivf_flat::index<T, IdxT>* index);
diff --git a/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
index 9eec4f9648..5d3d23c3ab 100644
--- a/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
@@ -35,21 +35,21 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
     IdxT n_rows);                                                          \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     raft::neighbors::ivf_flat::index<T, IdxT>* index);
diff --git a/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
index fc24cbff74..3150a676eb 100644
--- a/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_extend(T, IdxT)                \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index,           \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
@@ -35,21 +35,21 @@
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template auto raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)           \
     ->raft::neighbors::ivf_flat::index<T, IdxT>;                           \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::neighbors::ivf_flat::index<T, IdxT>* index,                      \
     const T* new_vectors,                                                  \
     const IdxT* new_indices,                                               \
     IdxT n_rows);                                                          \
                                                                            \
   template void raft::neighbors::ivf_flat::extend<T, IdxT>(                \
-    raft::device_resources const& handle,                                  \
+    raft::resources const& handle,                                         \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
     raft::neighbors::ivf_flat::index<T, IdxT>* index);
diff --git a/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
index 5a1fae6d5a..03dcfee817 100644
--- a/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_search_float_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     const T* queries,                                           \
@@ -38,7 +38,7 @@
     rmm::mr::device_memory_resource* mr);                       \
                                                                 \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     raft::device_matrix_view<const T, IdxT, row_major> queries, \
diff --git a/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
index bc84159a41..7646081183 100644
--- a/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_search_int8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     const T* queries,                                           \
@@ -38,7 +38,7 @@
     rmm::mr::device_memory_resource* mr);                       \
                                                                 \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     raft::device_matrix_view<const T, IdxT, row_major> queries, \
diff --git a/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
index 9e70e21af4..5d2effd385 100644
--- a/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
@@ -27,7 +27,7 @@
 
 #define instantiate_raft_neighbors_ivf_flat_search(T, IdxT)     \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     const T* queries,                                           \
@@ -38,7 +38,7 @@
     rmm::mr::device_memory_resource* mr);                       \
                                                                 \
   template void raft::neighbors::ivf_flat::search<T, IdxT>(     \
-    raft::device_resources const& handle,                       \
+    raft::resources const& handle,                              \
     const raft::neighbors::ivf_flat::search_params& params,     \
     const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
     raft::device_matrix_view<const T, IdxT, row_major> queries, \
diff --git a/cpp/src/neighbors/ivfpq_build_float_int64_t.cu b/cpp/src/neighbors/ivfpq_build_float_int64_t.cu
index 6771964cae..8281abb62e 100644
--- a/cpp/src/neighbors/ivfpq_build_float_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_build_float_int64_t.cu
@@ -19,12 +19,12 @@
 
 #define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
                                                                                          \
   template auto raft::neighbors::ivf_pq::build(                                          \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     const T* dataset,                                                                    \
     IdxT n_rows,                                                                         \
diff --git a/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
index 759045faa7..5f79ee3033 100644
--- a/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_build_int8_t_int64_t.cu
@@ -19,12 +19,12 @@
 
 #define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
                                                                                          \
   template auto raft::neighbors::ivf_pq::build(                                          \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     const T* dataset,                                                                    \
     IdxT n_rows,                                                                         \
diff --git a/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
index 62a47e9bcf..49866ba09a 100644
--- a/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_build_uint8_t_int64_t.cu
@@ -19,12 +19,12 @@
 
 #define instantiate_raft_neighbors_ivf_pq_build(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::build<T, IdxT>( \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     raft::device_matrix_view<const T, IdxT, row_major> dataset);                         \
                                                                                          \
   template auto raft::neighbors::ivf_pq::build(                                          \
-    raft::device_resources const& handle,                                                \
+    raft::resources const& handle,                                                       \
     const raft::neighbors::ivf_pq::index_params& params,                                 \
     const T* dataset,                                                                    \
     IdxT n_rows,                                                                         \
diff --git a/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
index 3e728be38d..6ee6cb3879 100644
--- a/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_extend_float_int64_t.cu
@@ -19,19 +19,19 @@
 
 #define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
                                                                                           \
   template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
@@ -39,7 +39,7 @@
     ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
diff --git a/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
index 7853e53f63..aefeba2aa6 100644
--- a/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_extend_int8_t_int64_t.cu
@@ -19,19 +19,19 @@
 
 #define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
                                                                                           \
   template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
@@ -39,7 +39,7 @@
     ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
diff --git a/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
index 599a88fc67..e3a6dd365b 100644
--- a/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
@@ -19,19 +19,19 @@
 
 #define instantiate_raft_neighbors_ivf_pq_extend(T, IdxT)                                 \
   template raft::neighbors::ivf_pq::index<IdxT> raft::neighbors::ivf_pq::extend<T, IdxT>( \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     const raft::neighbors::ivf_pq::index<IdxT>& idx);                                     \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                       \
     std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,     \
     raft::neighbors::ivf_pq::index<IdxT>* idx);                                           \
                                                                                           \
   template auto raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,                                      \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
@@ -39,7 +39,7 @@
     ->raft::neighbors::ivf_pq::index<IdxT>;                                               \
                                                                                           \
   template void raft::neighbors::ivf_pq::extend<T, IdxT>(                                 \
-    raft::device_resources const& handle,                                                 \
+    raft::resources const& handle,                                                        \
     raft::neighbors::ivf_pq::index<IdxT>* idx,                                            \
     const T* new_vectors,                                                                 \
     const IdxT* new_indices,                                                              \
diff --git a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
index ab946d2b65..2bcbe22501 100644
--- a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
@@ -19,7 +19,7 @@
 
 #define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     raft::device_matrix_view<const T, IdxT, row_major> queries,  \
@@ -27,7 +27,7 @@
     raft::device_matrix_view<float, IdxT, row_major> distances); \
                                                                  \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     const T* queries,                                            \
diff --git a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
index af54a9312a..74432c1963 100644
--- a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
@@ -19,7 +19,7 @@
 
 #define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     raft::device_matrix_view<const T, IdxT, row_major> queries,  \
@@ -27,7 +27,7 @@
     raft::device_matrix_view<float, IdxT, row_major> distances); \
                                                                  \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     const T* queries,                                            \
diff --git a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
index 7b49487506..8a05263ca0 100644
--- a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
@@ -19,7 +19,7 @@
 
 #define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     raft::device_matrix_view<const T, IdxT, row_major> queries,  \
@@ -27,7 +27,7 @@
     raft::device_matrix_view<float, IdxT, row_major> distances); \
                                                                  \
   template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::device_resources const& handle,                        \
+    raft::resources const& handle,                               \
     const raft::neighbors::ivf_pq::search_params& params,        \
     const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
     const T* queries,                                            \
diff --git a/cpp/src/neighbors/refine_00_generate.py b/cpp/src/neighbors/refine_00_generate.py
index 18c8857e3f..8e28092309 100644
--- a/cpp/src/neighbors/refine_00_generate.py
+++ b/cpp/src/neighbors/refine_00_generate.py
@@ -42,7 +42,7 @@
 
 #define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)       \\
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(        \\
-    raft::device_resources const& handle,                                              \\
+    raft::resources const& handle,                                              \\
     raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,             \\
     raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,             \\
     raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,  \\
@@ -51,7 +51,7 @@
     raft::distance::DistanceType metric);                                              \\
                                                                                        \\
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(        \\
-    raft::device_resources const& handle,                                              \\
+    raft::resources const& handle,                                              \\
     raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,               \\
     raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,               \\
     raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,    \\
diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu
index 7e811fd7e3..ea6892d2c5 100644
--- a/cpp/src/neighbors/refine_float_float.cu
+++ b/cpp/src/neighbors/refine_float_float.cu
@@ -28,7 +28,7 @@
 
 #define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
     raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
@@ -37,7 +37,7 @@
     raft::distance::DistanceType metric);                                             \
                                                                                       \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
     raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
diff --git a/cpp/src/neighbors/refine_int8_t_float.cu b/cpp/src/neighbors/refine_int8_t_float.cu
index 6983c2492c..6ed1f86db3 100644
--- a/cpp/src/neighbors/refine_int8_t_float.cu
+++ b/cpp/src/neighbors/refine_int8_t_float.cu
@@ -28,7 +28,7 @@
 
 #define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
     raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
@@ -37,7 +37,7 @@
     raft::distance::DistanceType metric);                                             \
                                                                                       \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
     raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
diff --git a/cpp/src/neighbors/refine_uint8_t_float.cu b/cpp/src/neighbors/refine_uint8_t_float.cu
index f61bc508c0..dac3c68b9f 100644
--- a/cpp/src/neighbors/refine_uint8_t_float.cu
+++ b/cpp/src/neighbors/refine_uint8_t_float.cu
@@ -28,7 +28,7 @@
 
 #define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx)      \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset,            \
     raft::device_matrix_view<const data_t, matrix_idx, row_major> queries,            \
     raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, \
@@ -37,7 +37,7 @@
     raft::distance::DistanceType metric);                                             \
                                                                                       \
   template void raft::neighbors::refine<idx_t, data_t, distance_t, matrix_idx>(       \
-    raft::device_resources const& handle,                                             \
+    raft::resources const& handle,                                                    \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset,              \
     raft::host_matrix_view<const data_t, matrix_idx, row_major> queries,              \
     raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates,   \
diff --git a/cpp/src/raft_runtime/cluster/cluster_cost.cuh b/cpp/src/raft_runtime/cluster/cluster_cost.cuh
index be7fa521aa..325a460ab9 100644
--- a/cpp/src/raft_runtime/cluster/cluster_cost.cuh
+++ b/cpp/src/raft_runtime/cluster/cluster_cost.cuh
@@ -15,15 +15,17 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/util/cuda_utils.cuh>
 
 namespace raft::runtime::cluster::kmeans {
 template <typename ElementType, typename IndexType>
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   const ElementType* X,
                   IndexType n_samples,
                   IndexType n_features,
@@ -31,19 +33,25 @@ void cluster_cost(raft::device_resources const& handle,
                   const ElementType* centroids,
                   ElementType* cost)
 {
-  rmm::device_uvector<char> workspace(n_samples * sizeof(IndexType), handle.get_stream());
+  rmm::device_uvector<char> workspace(n_samples * sizeof(IndexType),
+                                      resource::get_cuda_stream(handle));
 
-  rmm::device_uvector<ElementType> x_norms(n_samples, handle.get_stream());
-  rmm::device_uvector<ElementType> centroid_norms(n_clusters, handle.get_stream());
-  raft::linalg::rowNorm(
-    x_norms.data(), X, n_features, n_samples, raft::linalg::L2Norm, true, handle.get_stream());
+  rmm::device_uvector<ElementType> x_norms(n_samples, resource::get_cuda_stream(handle));
+  rmm::device_uvector<ElementType> centroid_norms(n_clusters, resource::get_cuda_stream(handle));
+  raft::linalg::rowNorm(x_norms.data(),
+                        X,
+                        n_features,
+                        n_samples,
+                        raft::linalg::L2Norm,
+                        true,
+                        resource::get_cuda_stream(handle));
   raft::linalg::rowNorm(centroid_norms.data(),
                         centroids,
                         n_features,
                         n_clusters,
                         raft::linalg::L2Norm,
                         true,
-                        handle.get_stream());
+                        resource::get_cuda_stream(handle));
 
   auto min_cluster_distance =
     raft::make_device_vector<raft::KeyValuePair<IndexType, ElementType>>(handle, n_samples);
@@ -58,22 +66,22 @@ void cluster_cost(raft::device_resources const& handle,
                                      (void*)workspace.data(),
                                      false,
                                      true,
-                                     handle.get_stream());
+                                     resource::get_cuda_stream(handle));
 
   auto distances = raft::make_device_vector<ElementType, IndexType>(handle, n_samples);
-  thrust::transform(handle.get_thrust_policy(),
+  thrust::transform(resource::get_thrust_policy(handle),
                     min_cluster_distance.data_handle(),
                     min_cluster_distance.data_handle() + n_samples,
                     distances.data_handle(),
                     raft::value_op{});
 
-  rmm::device_scalar<ElementType> device_cost(0, handle.get_stream());
+  rmm::device_scalar<ElementType> device_cost(0, resource::get_cuda_stream(handle));
   raft::cluster::kmeans::cluster_cost(handle,
                                       distances.view(),
                                       workspace,
                                       make_device_scalar_view<ElementType>(device_cost.data()),
                                       raft::add_op{});
 
-  raft::update_host(cost, device_cost.data(), 1, handle.get_stream());
+  raft::update_host(cost, device_cost.data(), 1, resource::get_cuda_stream(handle));
 }
 }  // namespace raft::runtime::cluster::kmeans
diff --git a/cpp/src/raft_runtime/cluster/cluster_cost_double.cu b/cpp/src/raft_runtime/cluster/cluster_cost_double.cu
index b6df92c839..ab0299c5d0 100644
--- a/cpp/src/raft_runtime/cluster/cluster_cost_double.cu
+++ b/cpp/src/raft_runtime/cluster/cluster_cost_double.cu
@@ -15,12 +15,12 @@
  */
 
 #include "cluster_cost.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   const double* X,
                   int n_samples,
                   int n_features,
diff --git a/cpp/src/raft_runtime/cluster/cluster_cost_float.cu b/cpp/src/raft_runtime/cluster/cluster_cost_float.cu
index 2c26b69984..22cfa1b0ff 100644
--- a/cpp/src/raft_runtime/cluster/cluster_cost_float.cu
+++ b/cpp/src/raft_runtime/cluster/cluster_cost_float.cu
@@ -15,12 +15,12 @@
  */
 
 #include "cluster_cost.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void cluster_cost(raft::device_resources const& handle,
+void cluster_cost(raft::resources const& handle,
                   const float* X,
                   int n_samples,
                   int n_features,
diff --git a/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu b/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
index 0b8b458042..0711f6c974 100644
--- a/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_fit_double.cu
@@ -15,11 +15,11 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void fit(raft::device_resources const& handle,
+void fit(raft::resources const& handle,
          const raft::cluster::kmeans::KMeansParams& params,
          raft::device_matrix_view<const double, int> X,
          std::optional<raft::device_vector_view<const double, int>> sample_weight,
diff --git a/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu b/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
index a2831c2cf0..f98a87d906 100644
--- a/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_fit_float.cu
@@ -15,11 +15,11 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void fit(raft::device_resources const& handle,
+void fit(raft::resources const& handle,
          const raft::cluster::kmeans::KMeansParams& params,
          raft::device_matrix_view<const float, int> X,
          std::optional<raft::device_vector_view<const float, int>> sample_weight,
diff --git a/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
index d2ec26f882..6c7563e457 100644
--- a/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
@@ -15,16 +15,17 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void init_plus_plus(raft::device_resources const& handle,
+void init_plus_plus(raft::resources const& handle,
                     const raft::cluster::kmeans::KMeansParams& params,
                     raft::device_matrix_view<const double, int> X,
                     raft::device_matrix_view<double, int> centroids)
 {
-  rmm::device_uvector<char> workspace(0, handle.get_stream());
+  rmm::device_uvector<char> workspace(0, resource::get_cuda_stream(handle));
   raft::cluster::kmeans::init_plus_plus<double, int>(handle, params, X, centroids, workspace);
 }
 }  // namespace raft::runtime::cluster::kmeans
diff --git a/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
index bacab3b7d6..99894f4ef7 100644
--- a/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
+++ b/cpp/src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
@@ -15,16 +15,17 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void init_plus_plus(raft::device_resources const& handle,
+void init_plus_plus(raft::resources const& handle,
                     const raft::cluster::kmeans::KMeansParams& params,
                     raft::device_matrix_view<const float, int> X,
                     raft::device_matrix_view<float, int> centroids)
 {
-  rmm::device_uvector<char> workspace(0, handle.get_stream());
+  rmm::device_uvector<char> workspace(0, resource::get_cuda_stream(handle));
   raft::cluster::kmeans::init_plus_plus<float, int>(handle, params, X, centroids, workspace);
 }
 }  // namespace raft::runtime::cluster::kmeans
diff --git a/cpp/src/raft_runtime/cluster/update_centroids.cuh b/cpp/src/raft_runtime/cluster/update_centroids.cuh
index de219329df..e0dec4bdcf 100644
--- a/cpp/src/raft_runtime/cluster/update_centroids.cuh
+++ b/cpp/src/raft_runtime/cluster/update_centroids.cuh
@@ -15,14 +15,16 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/norm.cuh>
 
 namespace raft::runtime::cluster::kmeans {
 
 template <typename DataT, typename IndexT>
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       const DataT* X,
                       int n_samples,
                       int n_features,
@@ -37,11 +39,11 @@ void update_centroids(raft::device_resources const& handle,
   auto centroids_view =
     raft::make_device_matrix_view<const DataT, IndexT>(centroids, n_clusters, n_features);
 
-  rmm::device_uvector<DataT> sample_weights_uvec(0, handle.get_stream());
+  rmm::device_uvector<DataT> sample_weights_uvec(0, resource::get_cuda_stream(handle));
   if (sample_weights == nullptr) {
-    sample_weights_uvec.resize(n_samples, handle.get_stream());
+    sample_weights_uvec.resize(n_samples, resource::get_cuda_stream(handle));
     DataT weight = 1.0 / n_samples;
-    thrust::fill(handle.get_thrust_policy(),
+    thrust::fill(resource::get_thrust_policy(handle),
                  sample_weights_uvec.data(),
                  sample_weights_uvec.data() + n_samples,
                  weight);
@@ -51,9 +53,9 @@ void update_centroids(raft::device_resources const& handle,
 
   auto new_centroids_view =
     raft::make_device_matrix_view<DataT, IndexT>(new_centroids, n_clusters, n_features);
-  rmm::device_uvector<DataT> weight_per_cluster_uvec(0, handle.get_stream());
+  rmm::device_uvector<DataT> weight_per_cluster_uvec(0, resource::get_cuda_stream(handle));
   if (weight_per_cluster == nullptr) {
-    weight_per_cluster_uvec.resize(n_clusters, handle.get_stream());
+    weight_per_cluster_uvec.resize(n_clusters, resource::get_cuda_stream(handle));
   }
   auto weight_per_cluster_view = raft::make_device_vector_view<DataT, IndexT>(
     weight_per_cluster == nullptr ? weight_per_cluster_uvec.data() : weight_per_cluster,
diff --git a/cpp/src/raft_runtime/cluster/update_centroids_double.cu b/cpp/src/raft_runtime/cluster/update_centroids_double.cu
index d967c503ff..18f618eed0 100644
--- a/cpp/src/raft_runtime/cluster/update_centroids_double.cu
+++ b/cpp/src/raft_runtime/cluster/update_centroids_double.cu
@@ -15,12 +15,12 @@
  */
 
 #include "update_centroids.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       const double* X,
                       int n_samples,
                       int n_features,
diff --git a/cpp/src/raft_runtime/cluster/update_centroids_float.cu b/cpp/src/raft_runtime/cluster/update_centroids_float.cu
index b141a1ef20..31bb42a661 100644
--- a/cpp/src/raft_runtime/cluster/update_centroids_float.cu
+++ b/cpp/src/raft_runtime/cluster/update_centroids_float.cu
@@ -15,12 +15,12 @@
  */
 
 #include "update_centroids.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::cluster::kmeans {
 
-void update_centroids(raft::device_resources const& handle,
+void update_centroids(raft::resources const& handle,
                       const float* X,
                       int n_samples,
                       int n_features,
diff --git a/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu b/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
index bec71ae698..d8949a645b 100644
--- a/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
+++ b/cpp/src/raft_runtime/distance/fused_l2_min_arg.cu
@@ -15,8 +15,10 @@
  */
 
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/kvp.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/linalg/norm.cuh>
@@ -35,7 +37,7 @@ struct KeyValueIndexOp {
 };
 
 template <typename value_t, typename idx_t>
-void compute_fused_l2_nn_min_arg(raft::device_resources const& handle,
+void compute_fused_l2_nn_min_arg(raft::resources const& handle,
                                  idx_t* min,
                                  const value_t* x,
                                  const value_t* y,
@@ -44,13 +46,15 @@ void compute_fused_l2_nn_min_arg(raft::device_resources const& handle,
                                  idx_t k,
                                  bool sqrt)
 {
-  rmm::device_uvector<int> workspace(m, handle.get_stream());
+  rmm::device_uvector<int> workspace(m, resource::get_cuda_stream(handle));
   auto kvp = raft::make_device_vector<raft::KeyValuePair<idx_t, value_t>>(handle, m);
 
-  rmm::device_uvector<value_t> x_norms(m, handle.get_stream());
-  rmm::device_uvector<value_t> y_norms(n, handle.get_stream());
-  raft::linalg::rowNorm(x_norms.data(), x, k, m, raft::linalg::L2Norm, true, handle.get_stream());
-  raft::linalg::rowNorm(y_norms.data(), y, k, n, raft::linalg::L2Norm, true, handle.get_stream());
+  rmm::device_uvector<value_t> x_norms(m, resource::get_cuda_stream(handle));
+  rmm::device_uvector<value_t> y_norms(n, resource::get_cuda_stream(handle));
+  raft::linalg::rowNorm(
+    x_norms.data(), x, k, m, raft::linalg::L2Norm, true, resource::get_cuda_stream(handle));
+  raft::linalg::rowNorm(
+    y_norms.data(), y, k, n, raft::linalg::L2Norm, true, resource::get_cuda_stream(handle));
 
   raft::distance::fusedL2NNMinReduce(kvp.data_handle(),
                                      x,
@@ -63,15 +67,18 @@ void compute_fused_l2_nn_min_arg(raft::device_resources const& handle,
                                      (void*)workspace.data(),
                                      sqrt,
                                      true,
-                                     handle.get_stream());
+                                     resource::get_cuda_stream(handle));
 
   KeyValueIndexOp<idx_t, value_t> conversion_op;
-  thrust::transform(
-    handle.get_thrust_policy(), kvp.data_handle(), kvp.data_handle() + m, min, conversion_op);
-  handle.sync_stream();
+  thrust::transform(resource::get_thrust_policy(handle),
+                    kvp.data_handle(),
+                    kvp.data_handle() + m,
+                    min,
+                    conversion_op);
+  resource::sync_stream(handle);
 }
 
-void fused_l2_nn_min_arg(raft::device_resources const& handle,
+void fused_l2_nn_min_arg(raft::resources const& handle,
                          int* min,
                          const float* x,
                          const float* y,
@@ -83,7 +90,7 @@ void fused_l2_nn_min_arg(raft::device_resources const& handle,
   compute_fused_l2_nn_min_arg<float, int>(handle, min, x, y, m, n, k, sqrt);
 }
 
-void fused_l2_nn_min_arg(raft::device_resources const& handle,
+void fused_l2_nn_min_arg(raft::resources const& handle,
                          int* min,
                          const double* x,
                          const double* y,
diff --git a/cpp/src/raft_runtime/distance/pairwise_distance.cu b/cpp/src/raft_runtime/distance/pairwise_distance.cu
index 62597a4799..868a243b02 100644
--- a/cpp/src/raft_runtime/distance/pairwise_distance.cu
+++ b/cpp/src/raft_runtime/distance/pairwise_distance.cu
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 
 namespace raft::runtime::distance {
 
-void pairwise_distance(raft::device_resources const& handle,
+void pairwise_distance(raft::resources const& handle,
                        float* x,
                        float* y,
                        float* dists,
@@ -35,7 +35,7 @@ void pairwise_distance(raft::device_resources const& handle,
     handle, x, y, dists, m, n, k, metric, isRowMajor, metric_arg);
 }
 
-void pairwise_distance(raft::device_resources const& handle,
+void pairwise_distance(raft::resources const& handle,
                        double* x,
                        double* y,
                        double* dists,
diff --git a/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu b/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
index 8814a8aafc..551a51f6b6 100644
--- a/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
+++ b/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu
@@ -15,7 +15,7 @@
  */
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/select_k.cuh>
 
 #include <raft_runtime/matrix/select_k.hpp>
@@ -24,7 +24,7 @@
 
 namespace raft::runtime::matrix {
 
-void select_k(const device_resources& handle,
+void select_k(const resources& handle,
               raft::device_matrix_view<const float, int64_t, row_major> in_val,
               std::optional<raft::device_matrix_view<const int64_t, int64_t, row_major>> in_idx,
               raft::device_matrix_view<float, int64_t, row_major> out_val,
diff --git a/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
index ea6002eab0..3752e9218e 100644
--- a/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
@@ -15,7 +15,7 @@
  */
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/neighbors/brute_force.cuh>
 
 #include <raft_runtime/neighbors/brute_force.hpp>
@@ -25,7 +25,7 @@
 namespace raft::runtime::neighbors::brute_force {
 
 #define RAFT_INST_BFKNN(IDX_T, DATA_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT)        \
-  void knn(raft::device_resources const& handle,                                         \
+  void knn(raft::resources const& handle,                                                \
            raft::device_matrix_view<const DATA_T, MATRIX_IDX_T, INDEX_LAYOUT> index,     \
            raft::device_matrix_view<const DATA_T, MATRIX_IDX_T, SEARCH_LAYOUT> search,   \
            raft::device_matrix_view<IDX_T, MATRIX_IDX_T, row_major> indices,             \
diff --git a/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu b/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
index 48a40ab56e..7fccb95411 100644
--- a/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
+++ b/cpp/src/raft_runtime/neighbors/ivf_flat_build.cu
@@ -20,14 +20,14 @@
 namespace raft::runtime::neighbors::ivf_flat {
 
 #define RAFT_INST_BUILD_EXTEND(T, IdxT)                                                \
-  auto build(raft::device_resources const& handle,                                     \
+  auto build(raft::resources const& handle,                                            \
              const raft::neighbors::ivf_flat::index_params& params,                    \
              raft::device_matrix_view<const T, IdxT, row_major> dataset)               \
     ->raft::neighbors::ivf_flat::index<T, IdxT>                                        \
   {                                                                                    \
     return raft::neighbors::ivf_flat::build<T, IdxT>(handle, params, dataset);         \
   }                                                                                    \
-  auto extend(raft::device_resources const& handle,                                    \
+  auto extend(raft::resources const& handle,                                           \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,          \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,   \
               const raft::neighbors::ivf_flat::index<T, IdxT>& orig_index)             \
@@ -37,7 +37,7 @@ namespace raft::runtime::neighbors::ivf_flat {
       handle, new_vectors, new_indices, orig_index);                                   \
   }                                                                                    \
                                                                                        \
-  void build(raft::device_resources const& handle,                                     \
+  void build(raft::resources const& handle,                                            \
              const raft::neighbors::ivf_flat::index_params& params,                    \
              raft::device_matrix_view<const T, IdxT, row_major> dataset,               \
              raft::neighbors::ivf_flat::index<T, IdxT>& idx)                           \
@@ -45,7 +45,7 @@ namespace raft::runtime::neighbors::ivf_flat {
     idx = build(handle, params, dataset);                                              \
   }                                                                                    \
                                                                                        \
-  void extend(raft::device_resources const& handle,                                    \
+  void extend(raft::resources const& handle,                                           \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,          \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,   \
               raft::neighbors::ivf_flat::index<T, IdxT>* idx)                          \
diff --git a/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu b/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
index eefc7f2932..7a156a4e02 100644
--- a/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
+++ b/cpp/src/raft_runtime/neighbors/ivf_flat_search.cu
@@ -20,7 +20,7 @@
 namespace raft::runtime::neighbors::ivf_flat {
 
 #define RAFT_INST_SEARCH(T, IdxT)                                         \
-  void search(raft::device_resources const& handle,                       \
+  void search(raft::resources const& handle,                              \
               raft::neighbors::ivf_flat::search_params const& params,     \
               const raft::neighbors::ivf_flat::index<T, IdxT>& index,     \
               raft::device_matrix_view<const T, IdxT, row_major> queries, \
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_build.cu b/cpp/src/raft_runtime/neighbors/ivfpq_build.cu
index 5bfb546060..65a29429bc 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_build.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_build.cu
@@ -21,13 +21,13 @@ namespace raft::runtime::neighbors::ivf_pq {
 
 #define RAFT_INST_BUILD_EXTEND(T, IdxT)                                                     \
   raft::neighbors::ivf_pq::index<IdxT> build(                                               \
-    raft::device_resources const& handle,                                                   \
+    raft::resources const& handle,                                                          \
     const raft::neighbors::ivf_pq::index_params& params,                                    \
     raft::device_matrix_view<const T, IdxT, row_major> dataset)                             \
   {                                                                                         \
     return raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                \
   }                                                                                         \
-  void build(raft::device_resources const& handle,                                          \
+  void build(raft::resources const& handle,                                                 \
              const raft::neighbors::ivf_pq::index_params& params,                           \
              raft::device_matrix_view<const T, IdxT, row_major> dataset,                    \
              raft::neighbors::ivf_pq::index<IdxT>* idx)                                     \
@@ -35,14 +35,14 @@ namespace raft::runtime::neighbors::ivf_pq {
     *idx = raft::neighbors::ivf_pq::build<T, IdxT>(handle, params, dataset);                \
   }                                                                                         \
   raft::neighbors::ivf_pq::index<IdxT> extend(                                              \
-    raft::device_resources const& handle,                                                   \
+    raft::resources const& handle,                                                          \
     raft::device_matrix_view<const T, IdxT, row_major> new_vectors,                         \
     std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,                  \
     const raft::neighbors::ivf_pq::index<IdxT>& idx)                                        \
   {                                                                                         \
     return raft::neighbors::ivf_pq::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
   }                                                                                         \
-  void extend(raft::device_resources const& handle,                                         \
+  void extend(raft::resources const& handle,                                                \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,               \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices,        \
               raft::neighbors::ivf_pq::index<IdxT>* idx)                                    \
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu b/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
index 45b731fdcf..7a2383281e 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_deserialize.cu
@@ -21,7 +21,7 @@
 
 namespace raft::runtime::neighbors::ivf_pq {
 
-void deserialize(raft::device_resources const& handle,
+void deserialize(raft::resources const& handle,
                  const std::string& filename,
                  raft::neighbors::ivf_pq::index<int64_t>* index)
 {
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
index d55d726671..22e4b64387 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
@@ -21,7 +21,7 @@
 namespace raft::runtime::neighbors::ivf_pq {
 
 #define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
+  void search(raft::resources const& handle,                                                      \
               const raft::neighbors::ivf_pq::search_params& params,                               \
               const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
               raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
index b73cbc0751..db7b3ce209 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
@@ -21,7 +21,7 @@
 namespace raft::runtime::neighbors::ivf_pq {
 
 #define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
+  void search(raft::resources const& handle,                                                      \
               const raft::neighbors::ivf_pq::search_params& params,                               \
               const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
               raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
index 2b3dfe585d..6a9a2888e0 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
@@ -21,7 +21,7 @@
 namespace raft::runtime::neighbors::ivf_pq {
 
 #define RAFT_SEARCH_INST(T, IdxT)                                                                 \
-  void search(raft::device_resources const& handle,                                               \
+  void search(raft::resources const& handle,                                                      \
               const raft::neighbors::ivf_pq::search_params& params,                               \
               const raft::neighbors::ivf_pq::index<IdxT>& idx,                                    \
               raft::device_matrix_view<const T, IdxT, row_major> queries,                         \
diff --git a/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu b/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
index 21bd221c45..9dea8a3b60 100644
--- a/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
+++ b/cpp/src/raft_runtime/neighbors/ivfpq_serialize.cu
@@ -21,7 +21,7 @@
 
 namespace raft::runtime::neighbors::ivf_pq {
 
-void serialize(raft::device_resources const& handle,
+void serialize(raft::resources const& handle,
                const std::string& filename,
                const raft::neighbors::ivf_pq::index<int64_t>& index)
 {
diff --git a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
index 79cec55294..a146eba875 100644
--- a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_float.cu
@@ -18,7 +18,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::device_matrix_view<const float, int64_t, row_major> dataset,
             raft::device_matrix_view<const float, int64_t, row_major> queries,
             raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
index f8a7a8c9c8..c840acf3df 100644
--- a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
@@ -18,7 +18,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::device_matrix_view<const int8_t, int64_t, row_major> dataset,
             raft::device_matrix_view<const int8_t, int64_t, row_major> queries,
             raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
index 8f68f9f88e..6ad8d9a38c 100644
--- a/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
@@ -18,7 +18,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::device_matrix_view<const uint8_t, int64_t, row_major> dataset,
             raft::device_matrix_view<const uint8_t, int64_t, row_major> queries,
             raft::device_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
index 7f19d44700..3d186c017c 100644
--- a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_float.cu
@@ -19,7 +19,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::host_matrix_view<const float, int64_t, row_major> dataset,
             raft::host_matrix_view<const float, int64_t, row_major> queries,
             raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
index bd21c6b198..93237d11d5 100644
--- a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
@@ -18,7 +18,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::host_matrix_view<const int8_t, int64_t, row_major> dataset,
             raft::host_matrix_view<const int8_t, int64_t, row_major> queries,
             raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
index f10d01cc09..91771e171f 100644
--- a/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
+++ b/cpp/src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
@@ -18,7 +18,7 @@
 
 namespace raft::runtime::neighbors {
 
-void refine(raft::device_resources const& handle,
+void refine(raft::resources const& handle,
             raft::host_matrix_view<const uint8_t, int64_t, row_major> dataset,
             raft::host_matrix_view<const uint8_t, int64_t, row_major> queries,
             raft::host_matrix_view<const int64_t, int64_t, row_major> neighbor_candidates,
diff --git a/cpp/src/raft_runtime/random/common.cuh b/cpp/src/raft_runtime/random/common.cuh
index 69b507b07b..f5f8a7c0ae 100644
--- a/cpp/src/raft_runtime/random/common.cuh
+++ b/cpp/src/raft_runtime/random/common.cuh
@@ -14,20 +14,28 @@
  * limitations under the License.
  */
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rmat_rectangular_generator.cuh>
 #include <raft_runtime/random/rmat_rectangular_generator.hpp>
 
-#define FUNC_DEF(IdxT, ProbT)                                                           \
-  void rmat_rectangular_gen(raft::device_resources const& handle,                       \
-                            IdxT* out,                                                  \
-                            IdxT* out_src,                                              \
-                            IdxT* out_dst,                                              \
-                            const ProbT* theta,                                         \
-                            IdxT r_scale,                                               \
-                            IdxT c_scale,                                               \
-                            IdxT n_edges,                                               \
-                            raft::random::RngState& r)                                  \
-  {                                                                                     \
-    raft::random::rmat_rectangular_gen<IdxT, ProbT>(                                    \
-      out, out_src, out_dst, theta, r_scale, c_scale, n_edges, handle.get_stream(), r); \
+#define FUNC_DEF(IdxT, ProbT)                                                          \
+  void rmat_rectangular_gen(raft::resources const& handle,                             \
+                            IdxT* out,                                                 \
+                            IdxT* out_src,                                             \
+                            IdxT* out_dst,                                             \
+                            const ProbT* theta,                                        \
+                            IdxT r_scale,                                              \
+                            IdxT c_scale,                                              \
+                            IdxT n_edges,                                              \
+                            raft::random::RngState& r)                                 \
+  {                                                                                    \
+    raft::random::rmat_rectangular_gen<IdxT, ProbT>(out,                               \
+                                                    out_src,                           \
+                                                    out_dst,                           \
+                                                    theta,                             \
+                                                    r_scale,                           \
+                                                    c_scale,                           \
+                                                    n_edges,                           \
+                                                    resource::get_cuda_stream(handle), \
+                                                    r);                                \
   }
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers.cu b/cpp/src/spatial/knn/detail/ball_cover/registers.cu
index 0bb6d123a9..493a602362 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers.cu
@@ -20,7 +20,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims)                                                   \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
@@ -37,7 +37,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims)                                                   \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
index f8ce27728b..d7b6e618fd 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py
@@ -48,7 +48,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \\
   template void                                                                       \\
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \\
-    raft::device_resources const& handle,                                                    \\
+    raft::resources const& handle,                                                    \\
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \\
     const Mvalue_t* query,                                                                   \\
     const Mvalue_int n_query_rows,                                                           \\
@@ -68,7 +68,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \\
   template void                                                                       \\
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \\
-    raft::device_resources const& handle,                                                    \\
+    raft::resources const& handle,                                                    \\
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \\
     const Mvalue_t* query,                                                                   \\
     const Mvalue_int n_query_rows,                                                           \\
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
index b4ecac06e6..bb9ec284cc 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
index 31628d8b82..2b06d0a1cd 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
index 80fda1bf9d..6f4e4061ac 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
index 40aa89aa39..aa407eeb20 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
index be159932a6..7918fb79cb 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
index a9fe8f355f..f8f29a107c 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_one<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
index b20df46a4f..1facd24510 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
index d5042b0142..6e681e2e9b 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
index 01002d356e..b4a038ffd7 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
index 5746ab99fb..bcb27568c1 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
index fad007a2d4..e40d837862 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
index 93083da5c6..8a362bcf16 100644
--- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
+++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
@@ -30,7 +30,7 @@
   Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func)                                       \
   template void                                                                              \
   raft::spatial::knn::detail::rbc_low_dim_pass_two<Mvalue_idx, Mvalue_t, Mvalue_int, Mdims>( \
-    raft::device_resources const& handle,                                                    \
+    raft::resources const& handle,                                                           \
     const BallCoverIndex<Mvalue_idx, Mvalue_t, Mvalue_int>& index,                           \
     const Mvalue_t* query,                                                                   \
     const Mvalue_int n_query_rows,                                                           \
diff --git a/cpp/test/cluster/cluster_solvers.cu b/cpp/test/cluster/cluster_solvers.cu
index 60e5f62dc0..e283f6c3c5 100644
--- a/cpp/test/cluster/cluster_solvers.cu
+++ b/cpp/test/cluster/cluster_solvers.cu
@@ -17,7 +17,9 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <memory>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_id.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/spectral/cluster_solvers.cuh>
 #include <raft/spectral/modularity_maximization.cuh>
@@ -31,13 +33,13 @@ TEST(Raft, ClusterSolvers)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
+  raft::resources h;
 
   index_type maxiter{100};
   value_type tol{1.0e-10};
   unsigned long long seed{100110021003};
 
-  auto stream = h.get_stream();
+  auto stream = resource::get_cuda_stream(h);
 
   index_type n{100};
   index_type d{10};
@@ -61,8 +63,8 @@ TEST(Raft, ModularitySolvers)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
-  ASSERT_EQ(0, h.get_device());
+  raft::resources h;
+  ASSERT_EQ(0, resource::get_device_id(h));
 
   index_type neigvs{10};
   index_type maxiter{100};
@@ -87,7 +89,7 @@ TEST(Raft, ModularitySolvers)
   cluster_solver_config_t<index_type, value_type> clust_cfg{k, maxiter, tol, seed};
   kmeans_solver_t<index_type, value_type> cluster_solver{clust_cfg};
 
-  auto stream = h.get_stream();
+  auto stream = resource::get_cuda_stream(h);
   sparse_matrix_t<index_type, value_type> sm{h, nullptr, nullptr, nullptr, 0, 0};
 
   EXPECT_ANY_THROW(spectral::modularity_maximization(
diff --git a/cpp/test/cluster/cluster_solvers_deprecated.cu b/cpp/test/cluster/cluster_solvers_deprecated.cu
index dbafbd15d6..5af8fda5ff 100644
--- a/cpp/test/cluster/cluster_solvers_deprecated.cu
+++ b/cpp/test/cluster/cluster_solvers_deprecated.cu
@@ -17,7 +17,8 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <memory>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/spectral/cluster_solvers_deprecated.cuh>
 
@@ -30,13 +31,13 @@ TEST(Raft, ClusterSolvers)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
+  raft::resources h;
 
   index_type maxiter{100};
   value_type tol{1.0e-10};
   unsigned long long seed{100110021003};
 
-  auto stream = h.get_stream();
+  auto stream = resource::get_cuda_stream(h);
 
   index_type n{100};
   index_type d{10};
diff --git a/cpp/test/cluster/kmeans.cu b/cpp/test/cluster/kmeans.cu
index 20110eed11..ebbe1c46be 100644
--- a/cpp/test/cluster/kmeans.cu
+++ b/cpp/test/cluster/kmeans.cu
@@ -17,12 +17,13 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <optional>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <raft/cluster/kmeans.cuh>
 #include <raft/core/cudart_utils.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/make_blobs.cuh>
 #include <raft/stats/adjusted_rand_index.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -41,7 +42,7 @@ struct KmeansInputs {
 };
 
 template <typename DataT, typename IndexT>
-void run_cluster_cost(const raft::device_resources& handle,
+void run_cluster_cost(const raft::resources& handle,
                       raft::device_vector_view<DataT, IndexT> minClusterDistance,
                       rmm::device_uvector<char>& workspace,
                       raft::device_scalar_view<DataT> clusterCost)
@@ -54,10 +55,10 @@ template <typename T>
 class KmeansTest : public ::testing::TestWithParam<KmeansInputs<T>> {
  protected:
   KmeansTest()
-    : d_labels(0, handle.get_stream()),
-      d_labels_ref(0, handle.get_stream()),
-      d_centroids(0, handle.get_stream()),
-      d_sample_weight(0, handle.get_stream())
+    : d_labels(0, resource::get_cuda_stream(handle)),
+      d_labels_ref(0, resource::get_cuda_stream(handle)),
+      d_centroids(0, resource::get_cuda_stream(handle)),
+      d_sample_weight(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -65,7 +66,7 @@ class KmeansTest : public ::testing::TestWithParam<KmeansInputs<T>> {
   {
     testparams = ::testing::TestWithParam<KmeansInputs<T>>::GetParam();
 
-    auto stream                = handle.get_stream();
+    auto stream                = resource::get_cuda_stream(handle);
     int n_samples              = testparams.n_row;
     int n_features             = testparams.n_col;
     params.n_clusters          = testparams.n_clusters;
@@ -240,7 +241,7 @@ class KmeansTest : public ::testing::TestWithParam<KmeansInputs<T>> {
 
     auto X      = raft::make_device_matrix<T, int>(handle, n_samples, n_features);
     auto labels = raft::make_device_vector<int, int>(handle, n_samples);
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     raft::random::make_blobs<T, int>(X.data_handle(),
                                      labels.data_handle(),
@@ -290,10 +291,10 @@ class KmeansTest : public ::testing::TestWithParam<KmeansInputs<T>> {
       raft::make_host_scalar_view<T>(&inertia),
       raft::make_host_scalar_view<int>(&n_iter));
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     score = raft::stats::adjusted_rand_index(
-      d_labels_ref.data(), d_labels.data(), n_samples, handle.get_stream());
+      d_labels_ref.data(), d_labels.data(), n_samples, resource::get_cuda_stream(handle));
 
     if (score < 1.0) {
       std::stringstream ss;
@@ -313,7 +314,7 @@ class KmeansTest : public ::testing::TestWithParam<KmeansInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   KmeansInputs<T> testparams;
   rmm::device_uvector<int> d_labels;
   rmm::device_uvector<int> d_labels_ref;
diff --git a/cpp/test/cluster/kmeans_balanced.cu b/cpp/test/cluster/kmeans_balanced.cu
index a34f2f3b59..5b1d764b28 100644
--- a/cpp/test/cluster/kmeans_balanced.cu
+++ b/cpp/test/cluster/kmeans_balanced.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.h"
 #include <gtest/gtest.h>
 #include <optional>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <raft/cluster/kmeans_balanced.cuh>
@@ -58,7 +59,7 @@ template <typename DataT, typename MathT, typename LabelT, typename IdxT, typena
 class KmeansBalancedTest : public ::testing::TestWithParam<KmeansBalancedInputs<MathT, IdxT>> {
  protected:
   KmeansBalancedTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       d_labels(0, stream),
       d_labels_ref(0, stream),
       d_centroids(0, stream)
@@ -120,10 +121,10 @@ class KmeansBalancedTest : public ::testing::TestWithParam<KmeansBalancedInputs<
     raft::cluster::kmeans_balanced::fit_predict(
       handle, p.kb_params, X_view, d_centroids_view, d_labels_view, op);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     score = raft::stats::adjusted_rand_index(
-      d_labels_ref.data(), d_labels.data(), p.n_rows, handle.get_stream());
+      d_labels_ref.data(), d_labels.data(), p.n_rows, resource::get_cuda_stream(handle));
 
     if (score < 1.0) {
       std::stringstream ss;
diff --git a/cpp/test/cluster/kmeans_find_k.cu b/cpp/test/cluster/kmeans_find_k.cu
index bb41d4fafc..8f017a0231 100644
--- a/cpp/test/cluster/kmeans_find_k.cu
+++ b/cpp/test/cluster/kmeans_find_k.cu
@@ -17,11 +17,12 @@
 #include "../test_utils.h"
 #include <gtest/gtest.h>
 #include <optional>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <vector>
 
 #include <raft/cluster/kmeans.cuh>
 #include <raft/core/cudart_utils.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/make_blobs.cuh>
 #include <raft/util/cuda_utils.cuh>
 
@@ -39,7 +40,10 @@ struct KmeansFindKInputs {
 template <typename T>
 class KmeansFindKTest : public ::testing::TestWithParam<KmeansFindKInputs<T>> {
  protected:
-  KmeansFindKTest() : stream(handle.get_stream()), best_k(raft::make_host_scalar<int>(0)) {}
+  KmeansFindKTest()
+    : stream(resource::get_cuda_stream(handle)), best_k(raft::make_host_scalar<int>(0))
+  {
+  }
 
   void basicTest()
   {
@@ -76,13 +80,13 @@ class KmeansFindKTest : public ::testing::TestWithParam<KmeansFindKInputs<T>> {
     raft::cluster::kmeans::find_k<int, T>(
       handle, X_view, best_k.view(), inertia.view(), n_iter.view(), n_clusters);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void SetUp() override { basicTest(); }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
   KmeansFindKInputs<T> testparams;
   raft::host_scalar<int> best_k;
diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu
index b2b177dde6..e660dbef13 100644
--- a/cpp/test/cluster/linkage.cu
+++ b/cpp/test/cluster/linkage.cu
@@ -24,6 +24,7 @@
 #undef RAFT_EXPLICIT_INSTANTIATE_ONLY
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/transpose.cuh>
@@ -169,15 +170,15 @@ class LinkageTest : public ::testing::TestWithParam<LinkageInputs<T, IdxT>> {
  public:
   LinkageTest()
     : params(::testing::TestWithParam<LinkageInputs<T, IdxT>>::GetParam()),
-      labels(0, handle.get_stream()),
-      labels_ref(0, handle.get_stream())
+      labels(0, resource::get_cuda_stream(handle)),
+      labels_ref(0, resource::get_cuda_stream(handle))
   {
   }
 
  protected:
   void basicTest()
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     labels.resize(params.n_row, stream);
     labels_ref.resize(params.n_row, stream);
@@ -217,7 +218,7 @@ class LinkageTest : public ::testing::TestWithParam<LinkageInputs<T, IdxT>> {
           std::make_optional<int>(params.c));
     }
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     score = compute_rand_index(labels.data(), labels_ref.data(), params.n_row, stream);
   }
@@ -225,7 +226,7 @@ class LinkageTest : public ::testing::TestWithParam<LinkageInputs<T, IdxT>> {
   void SetUp() override { basicTest(); }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
 
   LinkageInputs<T, IdxT> params;
   rmm::device_uvector<IdxT> labels, labels_ref;
diff --git a/cpp/test/core/handle.cpp b/cpp/test/core/handle.cpp
index fddfd58bb8..8c5e023df3 100644
--- a/cpp/test/core/handle.cpp
+++ b/cpp/test/core/handle.cpp
@@ -21,6 +21,10 @@
 #include <memory>
 #include <raft/core/comms.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/core/resource/comms.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 #include <unordered_map>
@@ -186,7 +190,7 @@ TEST(Raft, HandleDefault)
 {
   raft::handle_t h;
   ASSERT_EQ(0, h.get_device());
-  ASSERT_EQ(rmm::cuda_stream_per_thread, h.get_stream());
+  ASSERT_EQ(rmm::cuda_stream_per_thread, resource::get_cuda_stream(h));
   ASSERT_NE(nullptr, h.get_cublas_handle());
   ASSERT_NE(nullptr, h.get_cusolver_dn_handle());
   ASSERT_NE(nullptr, h.get_cusolver_sp_handle());
@@ -206,8 +210,8 @@ TEST(Raft, Handle)
   RAFT_CUDA_TRY(cudaStreamCreate(&stream));
   rmm::cuda_stream_view stream_view(stream);
   raft::handle_t handle(stream_view);
-  ASSERT_EQ(stream_view, handle.get_stream());
-  handle.sync_stream(stream);
+  ASSERT_EQ(stream_view, resource::get_cuda_stream(handle));
+  resource::sync_stream(handle, stream);
   RAFT_CUDA_TRY(cudaStreamDestroy(stream));
 }
 
@@ -217,16 +221,16 @@ TEST(Raft, DefaultConstructor)
 
   // Make sure waiting on the default stream pool
   // does not fail.
-  handle.wait_stream_pool_on_stream();
-  handle.sync_stream_pool();
+  resource::wait_stream_pool_on_stream(handle);
+  resource::sync_stream_pool(handle);
 
-  auto s1 = handle.get_next_usable_stream();
-  auto s2 = handle.get_stream();
-  auto s3 = handle.get_next_usable_stream(5);
+  auto s1 = resource::get_next_usable_stream(handle);
+  auto s2 = resource::get_cuda_stream(handle);
+  auto s3 = resource::get_next_usable_stream(handle, 5);
 
   ASSERT_EQ(s1, s2);
   ASSERT_EQ(s2, s3);
-  ASSERT_EQ(0, handle.get_stream_pool_size());
+  ASSERT_EQ(0, resource::get_stream_pool_size(handle));
 }
 
 TEST(Raft, GetHandleFromPool)
@@ -250,7 +254,7 @@ TEST(Raft, Comms)
   auto comm1 = std::make_shared<comms_t>(std::unique_ptr<comms_iface>(new mock_comms(2)));
   handle.set_comms(comm1);
 
-  ASSERT_EQ(handle.get_comms().get_size(), 2);
+  ASSERT_EQ(resource::get_comms(handle).get_size(), 2);
 }
 
 TEST(Raft, SubComms)
@@ -271,16 +275,16 @@ TEST(Raft, WorkspaceResource)
   raft::handle_t handle;
 
   ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                handle.get_workspace_resource()) == nullptr);
-  ASSERT_EQ(rmm::mr::get_current_device_resource(), handle.get_workspace_resource());
+                resource::get_workspace_resource(handle)) == nullptr);
+  ASSERT_EQ(rmm::mr::get_current_device_resource(), resource::get_workspace_resource(handle));
 
   auto pool_mr = new rmm::mr::pool_memory_resource(rmm::mr::get_current_device_resource());
   std::shared_ptr<rmm::cuda_stream_pool> pool = {nullptr};
   raft::handle_t handle2(rmm::cuda_stream_per_thread, pool, pool_mr);
 
   ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                handle2.get_workspace_resource()) != nullptr);
-  ASSERT_EQ(pool_mr, handle2.get_workspace_resource());
+                resource::get_workspace_resource(handle2)) != nullptr);
+  ASSERT_EQ(pool_mr, resource::get_workspace_resource(handle2));
 
   delete pool_mr;
 }
@@ -299,10 +303,10 @@ TEST(Raft, WorkspaceResourceCopy)
 
   // Assert the workspace_resources are what we expect
   ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                handle.get_workspace_resource()) == nullptr);
+                resource::get_workspace_resource(handle)) == nullptr);
 
   ASSERT_TRUE(dynamic_cast<const rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>*>(
-                copied_handle.get_workspace_resource()) != nullptr);
+                resource::get_workspace_resource(copied_handle)) != nullptr);
 }
 
 TEST(Raft, HandleCopy)
diff --git a/cpp/test/core/mdarray.cu b/cpp/test/core/mdarray.cu
index aab7979c0e..86e51be2e4 100644
--- a/cpp/test/core/mdarray.cu
+++ b/cpp/test/core/mdarray.cu
@@ -15,14 +15,15 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <gtest/gtest.h>
 #include <raft/core/device_container_policy.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_container_policy.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/cuda_stream.hpp>
@@ -84,7 +85,7 @@ TEST(MDArray, Policy) { test_uvector_policy(); }
 void test_mdarray_basic()
 {
   using matrix_extent = stdex::extents<int, dynamic_extent, dynamic_extent>;
-  raft::device_resources handle;
+  raft::resources handle;
   auto s = resource::get_cuda_stream(handle);
   {
     /**
@@ -190,7 +191,7 @@ TEST(MDArray, Basic) { test_mdarray_basic(); }
 template <typename BasicMDarray, typename PolicyFn, typename ThrustPolicy>
 void test_mdarray_copy_move(ThrustPolicy exec, PolicyFn make_policy)
 {
-  raft::device_resources handle;
+  raft::resources handle;
   using matrix_extent = stdex::extents<size_t, dynamic_extent, dynamic_extent>;
   layout_c_contiguous::mapping<matrix_extent> layout{matrix_extent{4, 4}};
 
@@ -265,7 +266,7 @@ TEST(MDArray, CopyMove)
   using matrix_extent = stdex::extents<size_t, dynamic_extent, dynamic_extent>;
   using d_matrix_t    = device_mdarray<float, matrix_extent>;
   using policy_t      = typename d_matrix_t::container_policy_type;
-  raft::device_resources handle;
+  raft::resources handle;
   auto s = resource::get_cuda_stream(handle);
   test_mdarray_copy_move<d_matrix_t>(rmm::exec_policy(s), []() { return policy_t{}; });
 
@@ -348,7 +349,7 @@ void test_factory_methods()
     ASSERT_EQ(h_vec.extent(0), n);
   }
   {
-    raft::device_resources handle;
+    raft::resources handle;
     // device mdarray
     auto d_matrix = make_device_matrix<float>(handle, n, n);
     ASSERT_EQ(d_matrix.extent(0), n);
@@ -361,7 +362,7 @@ void test_factory_methods()
   }
 
   {
-    raft::device_resources handle;
+    raft::resources handle;
     // device scalar
     auto d_scalar = make_device_scalar<double>(handle, 17.0);
     static_assert(d_scalar.rank() == 1);
@@ -371,17 +372,17 @@ void test_factory_methods()
     auto view = d_scalar.view();
     thrust::device_vector<int32_t> status(1, 0);
     auto p_status = status.data().get();
-    thrust::for_each_n(rmm::exec_policy(handle.get_stream()),
+    thrust::for_each_n(rmm::exec_policy(resource::get_cuda_stream(handle)),
                        thrust::make_counting_iterator(0),
                        1,
                        [=] __device__(auto i) {
                          if (view(i) != 17.0) { myAtomicAdd(p_status, 1); }
                        });
-    check_status(p_status, handle.get_stream());
+    check_status(p_status, resource::get_cuda_stream(handle));
   }
   {
     // host scalar
-    raft::device_resources handle;
+    raft::resources handle;
 
     auto h_scalar = make_host_scalar<double>(handle, 17.0);
     static_assert(h_scalar.rank() == 1);
@@ -395,7 +396,7 @@ void test_factory_methods()
 
   // managed
   {
-    raft::device_resources handle;
+    raft::resources handle;
     auto mda = make_device_vector<int>(handle, 10);
 
     auto mdv = make_managed_mdspan(mda.data_handle(), raft::vector_extent<int>{10});
@@ -426,7 +427,7 @@ void check_matrix_layout(device_matrix_view<T, Index, LayoutPolicy> in)
 
 TEST(MDArray, FuncArg)
 {
-  raft::device_resources handle;
+  raft::resources handle;
   {
     auto d_matrix = make_device_matrix<float>(handle, 10, 10);
     check_matrix_layout(d_matrix.view());
@@ -497,7 +498,7 @@ TEST(MDSpan, LayoutRightPadded) { test_mdspan_layout_right_padded(); }
 void test_mdarray_padding()
 {
   using extents_type = stdex::extents<size_t, dynamic_extent, dynamic_extent>;
-  raft::device_resources handle;
+  raft::resources handle;
   auto s = resource::get_cuda_stream(handle);
   {
     constexpr int rows            = 6;
@@ -741,7 +742,7 @@ struct TestElement1 {
 void test_mdspan_padding_by_type()
 {
   using extents_type = stdex::extents<size_t, dynamic_extent, dynamic_extent>;
-  raft::device_resources handle;
+  raft::resources handle;
   auto s = rmm::cuda_stream_default;
 
   {
@@ -814,7 +815,7 @@ TEST(MDSpan, MDSpanPaddingType) { test_mdspan_padding_by_type(); }
 void test_mdspan_aligned_matrix()
 {
   using extents_type = stdex::extents<size_t, dynamic_extent, dynamic_extent>;
-  raft::device_resources handle;
+  raft::resources handle;
   constexpr int rows = 2;
   constexpr int cols = 10;
 
@@ -930,10 +931,10 @@ void test_mdarray_unravel()
   }
 
   {
-    raft::device_resources handle;
+    raft::resources handle;
     auto m   = make_device_matrix<float, size_t>(handle, 7, 6);
     auto m_v = m.view();
-    thrust::for_each_n(handle.get_thrust_policy(),
+    thrust::for_each_n(resource::get_thrust_policy(handle),
                        thrust::make_counting_iterator(0ul),
                        m_v.size(),
                        [=] HD(size_t i) {
@@ -943,7 +944,7 @@ void test_mdarray_unravel()
                        });
     thrust::device_vector<int32_t> status(1, 0);
     auto p_status = status.data().get();
-    thrust::for_each_n(handle.get_thrust_policy(),
+    thrust::for_each_n(resource::get_thrust_policy(handle),
                        thrust::make_counting_iterator(0ul),
                        m_v.size(),
                        [=] __device__(size_t i) {
@@ -952,7 +953,7 @@ void test_mdarray_unravel()
                          auto v = std::apply(m_v, coord);
                          if (v != static_cast<float>(i)) { raft::myAtomicAdd(p_status, 1); }
                        });
-    check_status(p_status, handle.get_stream());
+    check_status(p_status, resource::get_cuda_stream(handle));
   }
 }
 }  // anonymous namespace
diff --git a/cpp/test/core/mdspan_utils.cu b/cpp/test/core/mdspan_utils.cu
index 5e479b839f..ad212569c2 100644
--- a/cpp/test/core/mdspan_utils.cu
+++ b/cpp/test/core/mdspan_utils.cu
@@ -17,9 +17,9 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_container_policy.hpp>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_container_policy.hpp>
 #include <raft/core/host_mdarray.hpp>
+#include <raft/core/resources.hpp>
 
 namespace raft {
 
@@ -78,7 +78,7 @@ TEST(MDSpan, TemplateAsserts) { test_template_asserts(); }
 
 void test_host_flatten()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   // flatten 3d host mdspan
   {
     using three_d_extents = extents<int, dynamic_extent, dynamic_extent, dynamic_extent>;
@@ -121,10 +121,10 @@ TEST(MDArray, HostFlatten) { test_host_flatten(); }
 
 void test_device_flatten()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   // flatten 3d device mdspan
   {
-    raft::device_resources handle;
+    raft::resources handle;
     using three_d_extents = extents<int, dynamic_extent, dynamic_extent, dynamic_extent>;
     using three_d_mdarray = device_mdarray<int, three_d_extents>;
 
@@ -165,7 +165,7 @@ TEST(MDArray, DeviceFlatten) { test_device_flatten(); }
 
 void test_reshape()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   // reshape 3d host array to vector
   {
     using three_d_extents = extents<int, dynamic_extent, dynamic_extent, dynamic_extent>;
@@ -184,7 +184,7 @@ void test_reshape()
 
   // reshape 4d device array to 2d
   {
-    raft::device_resources handle;
+    raft::resources handle;
     using four_d_extents =
       extents<int, dynamic_extent, dynamic_extent, dynamic_extent, dynamic_extent>;
     using four_d_mdarray = device_mdarray<int, four_d_extents>;
@@ -223,7 +223,7 @@ void test_const_mdspan()
 {
   // 3d host array
   {
-    raft::device_resources handle;
+    raft::resources handle;
     using two_d_extents = extents<int, 5, 5>;
     using two_d_mdarray = host_mdarray<float, two_d_extents>;
 
diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu
index 4131a33171..0d12b97555 100644
--- a/cpp/test/core/numpy_serializer.cu
+++ b/cpp/test/core/numpy_serializer.cu
@@ -16,8 +16,8 @@
 
 #include <gtest/gtest.h>
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/serialize.hpp>
 
 #include <thrust/device_vector.h>
@@ -40,7 +40,7 @@ using dextents = std::experimental::dextents<IndexType, Rank>;
 namespace raft {
 
 template <typename MDSpanType, typename VectorType, typename... Args>
-void test_mdspan_roundtrip(const raft::device_resources& handle, VectorType& vec, Args... dims)
+void test_mdspan_roundtrip(const raft::resources& handle, VectorType& vec, Args... dims)
 {
   VectorType vec2(vec.size());
 
@@ -57,7 +57,7 @@ void test_mdspan_roundtrip(const raft::device_resources& handle, VectorType& vec
 template <typename T>
 void run_roundtrip_test_mdspan_serializer()
 {
-  raft::device_resources handle{};
+  raft::resources handle{};
   thrust::host_vector<T> vec = std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8};
 
   using mdspan_matrix2d_c_layout =
@@ -110,7 +110,7 @@ TEST(NumPySerializerMDSpan, HeaderRoundTrip)
 
 TEST(NumPySerializerMDSpan, ManagedMDSpan)
 {
-  raft::device_resources handle{};
+  raft::resources handle{};
   thrust::universal_vector<float> vec = std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8};
   using managed_mdspan_matrix2d_c_layout =
     raft::managed_mdspan<float, dextents<std::size_t, 3>, raft::layout_c_contiguous>;
diff --git a/cpp/test/core/sparse_matrix.cu b/cpp/test/core/sparse_matrix.cu
index 4feb171e21..3cf0bc98d8 100644
--- a/cpp/test/core/sparse_matrix.cu
+++ b/cpp/test/core/sparse_matrix.cu
@@ -16,7 +16,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_coo_matrix.hpp>
 #include <raft/core/device_csr_matrix.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <type_traits>
 
 namespace raft {
@@ -77,7 +77,7 @@ void test_device_coo_sparsity_preserving_ref(S& mat, void* d)
 
 void test_device_coo_matrix()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   auto sparsity_owning = raft::make_device_coo_matrix<float, int, int, int>(handle, 5, 5);
 
   auto structure_view = sparsity_owning.structure_view();
@@ -110,7 +110,7 @@ void test_device_coo_matrix()
 
 void test_device_csr_matrix()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   auto sparsity_owning = raft::make_device_csr_matrix<float, int, int, int>(handle, 5, 5);
 
   auto comp_struct = raft::make_device_compressed_structure(handle, 5, 5, 5);
diff --git a/cpp/test/core/temporary_device_buffer.cu b/cpp/test/core/temporary_device_buffer.cu
index cc8af24f10..f61fa826a9 100644
--- a/cpp/test/core/temporary_device_buffer.cu
+++ b/cpp/test/core/temporary_device_buffer.cu
@@ -15,9 +15,10 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/host_mdarray.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/core/temporary_device_buffer.hpp>
 
 #include <rmm/device_uvector.hpp>
@@ -29,7 +30,7 @@ namespace raft {
 TEST(TemporaryDeviceBuffer, DevicePointer)
 {
   {
-    raft::device_resources handle;
+    raft::resources handle;
     auto exts  = raft::make_extents<int>(5);
     auto array = raft::make_device_mdarray<int, int>(handle, exts);
 
@@ -41,7 +42,7 @@ TEST(TemporaryDeviceBuffer, DevicePointer)
   }
 
   {
-    raft::device_resources handle;
+    raft::resources handle;
     auto exts  = raft::make_extents<int>(5);
     auto array = raft::make_device_mdarray<int, int>(handle, exts);
 
@@ -55,21 +56,22 @@ TEST(TemporaryDeviceBuffer, DevicePointer)
 
 TEST(TemporaryDeviceBuffer, HostPointerWithWriteBack)
 {
-  raft::device_resources handle;
+  raft::resources handle;
   auto exts  = raft::make_extents<int>(5);
   auto array = raft::make_host_mdarray<int, int>(exts);
   thrust::fill(array.data_handle(), array.data_handle() + array.extent(0), 1);
-  rmm::device_uvector<int> result(5, handle.get_stream());
+  rmm::device_uvector<int> result(5, resource::get_cuda_stream(handle));
 
   {
     auto d_buf  = raft::make_writeback_temporary_device_buffer(handle, array.data_handle(), exts);
     auto d_view = d_buf.view();
 
-    thrust::fill(rmm::exec_policy(handle.get_stream()),
+    thrust::fill(rmm::exec_policy(resource::get_cuda_stream(handle)),
                  d_view.data_handle(),
                  d_view.data_handle() + d_view.extent(0),
                  10);
-    raft::copy(result.data(), d_view.data_handle(), d_view.extent(0), handle.get_stream());
+    raft::copy(
+      result.data(), d_view.data_handle(), d_view.extent(0), resource::get_cuda_stream(handle));
 
     static_assert(!std::is_const_v<typename decltype(d_buf.view())::element_type>,
                   "element_type should not be const");
@@ -79,7 +81,7 @@ TEST(TemporaryDeviceBuffer, HostPointerWithWriteBack)
                                     result.data(),
                                     array.extent(0),
                                     raft::Compare<int>(),
-                                    handle.get_stream()));
+                                    resource::get_cuda_stream(handle)));
 }
 
 }  // namespace raft
diff --git a/cpp/test/distance/dist_adj.cu b/cpp/test/distance/dist_adj.cu
index 413e548532..ff26744185 100644
--- a/cpp/test/distance/dist_adj.cu
+++ b/cpp/test/distance/dist_adj.cu
@@ -16,7 +16,8 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -88,7 +89,7 @@ class DistanceAdjTest : public ::testing::TestWithParam<DistanceAdjInputs<DataTy
  public:
   DistanceAdjTest()
     : params(::testing::TestWithParam<DistanceAdjInputs<DataType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       dist(params.m * params.n, stream),
       dist_ref(params.m * params.n, stream)
   {
@@ -134,7 +135,7 @@ class DistanceAdjTest : public ::testing::TestWithParam<DistanceAdjInputs<DataTy
                                                   worksize,
                                                   threshold_op,
                                                   isRowMajor);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void TearDown() override {}
@@ -147,7 +148,7 @@ class DistanceAdjTest : public ::testing::TestWithParam<DistanceAdjInputs<DataTy
   // memory consumption if we use uint8_t instead of bool.
   rmm::device_uvector<uint8_t> dist_ref;
   rmm::device_uvector<uint8_t> dist;
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 };
 
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index 60951daeb7..6c7cab3f7b 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -16,11 +16,12 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/common/nvtx.hpp>              // common::nvtx::range
+#include <raft/common/nvtx.hpp>  // common::nvtx::range
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>       // make_device_matrix_view
-#include <raft/core/device_resources.hpp>    // raft::device_resources
 #include <raft/core/operators.hpp>           // raft::sqrt
+#include <raft/core/resources.hpp>           // raft::resources
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/random/rng.cuh>
@@ -428,7 +429,7 @@ constexpr bool layout_to_row_major<layout_f_contiguous>()
 }
 
 template <raft::distance::DistanceType distanceType, typename DataType, typename layout>
-void distanceLauncher(raft::device_resources const& handle,
+void distanceLauncher(raft::resources const& handle,
                       DataType* x,
                       DataType* y,
                       DataType* dist,
@@ -453,7 +454,7 @@ class DistanceTest : public ::testing::TestWithParam<DistanceInputs<DataType>> {
  public:
   DistanceTest()
     : params(::testing::TestWithParam<DistanceInputs<DataType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       x(params.m * params.k, stream),
       y(params.n * params.k, stream),
       dist_ref(params.m * params.n, stream),
@@ -520,11 +521,11 @@ class DistanceTest : public ::testing::TestWithParam<DistanceInputs<DataType>> {
                                                                     threshold,
                                                                     metric_arg);
     }
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   DistanceInputs<DataType> params;
@@ -535,13 +536,14 @@ template <raft::distance::DistanceType distanceType>
 class BigMatrixDistanceTest : public ::testing::Test {
  public:
   BigMatrixDistanceTest()
-    : x(m * k, handle.get_stream()), dist(std::size_t(m) * m, handle.get_stream()){};
+    : x(m * k, resource::get_cuda_stream(handle)),
+      dist(std::size_t(m) * m, resource::get_cuda_stream(handle)){};
   void SetUp() override
   {
     auto testInfo = testing::UnitTest::GetInstance()->current_test_info();
     common::nvtx::range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name());
 
-    void pairwise_distance(raft::device_resources const& handle,
+    void pairwise_distance(raft::resources const& handle,
                            float* x,
                            float* y,
                            float* dists,
@@ -555,11 +557,11 @@ class BigMatrixDistanceTest : public ::testing::Test {
     constexpr float metric_arg = 0.0f;
     raft::distance::distance<distanceType, float, float, float>(
       handle, x.data(), x.data(), dist.data(), m, n, k, row_major, metric_arg);
-    RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+    RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle)));
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   int m = 48000;
   int n = 48000;
   int k = 1;
diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu
index c4ccd55f69..e3f3bf3324 100644
--- a/cpp/test/distance/fused_l2_nn.cu
+++ b/cpp/test/distance/fused_l2_nn.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/kvp.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/detail/fused_l2_nn.cuh>
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/linalg/norm.cuh>
@@ -127,7 +128,7 @@ class FusedL2NNTest : public ::testing::TestWithParam<Inputs<DataT>> {
  public:
   FusedL2NNTest()
     : params(::testing::TestWithParam<Inputs<DataT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       x(params.m * params.k, stream),
       y(params.n * params.k, stream),
       xn(params.m, stream),
@@ -154,7 +155,7 @@ class FusedL2NNTest : public ::testing::TestWithParam<Inputs<DataT>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
   Inputs<DataT> params;
   rmm::device_uvector<DataT> x;
@@ -361,7 +362,7 @@ INSTANTIATE_TEST_CASE_P(FusedL2NNTests, FusedL2NNTestD_Sqrt, ::testing::ValuesIn
 template <typename DataT, bool Sqrt>
 class FusedL2NNDetTest : public FusedL2NNTest<DataT, Sqrt> {
  public:
-  FusedL2NNDetTest() : stream(handle.get_stream()), min1(0, stream) {}
+  FusedL2NNDetTest() : stream(resource::get_cuda_stream(handle)), min1(0, stream) {}
 
   void SetUp() override
   {
@@ -374,7 +375,7 @@ class FusedL2NNDetTest : public FusedL2NNTest<DataT, Sqrt> {
   void TearDown() override { FusedL2NNTest<DataT, Sqrt>::TearDown(); }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<raft::KeyValuePair<int, DataT>> min1;
diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu
index 797e31c85d..b3640a888a 100644
--- a/cpp/test/distance/gram.cu
+++ b/cpp/test/distance/gram.cu
@@ -145,7 +145,7 @@ class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
       gram_host.data(), gram.data(), gram.size(), raft::CompareApprox<math_t>(1e-6f)));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   GramMatrixInputs params;
 
diff --git a/cpp/test/distance/gram_base.cuh b/cpp/test/distance/gram_base.cuh
index 8c0652bc16..1862d1a540 100644
--- a/cpp/test/distance/gram_base.cuh
+++ b/cpp/test/distance/gram_base.cuh
@@ -16,6 +16,7 @@
 
 #include <iostream>
 #include <memory>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/kernels.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -46,13 +47,13 @@ void naiveGramMatrixKernel(int n1,
                            bool is_row_major,
                            KernelParams kernel,
                            cudaStream_t stream,
-                           const raft::device_resources& handle)
+                           const raft::resources& handle)
 {
   std::vector<math_t> x1_host(x1.size());
   raft::update_host(x1_host.data(), x1.data(), x1.size(), stream);
   std::vector<math_t> x2_host(x2.size());
   raft::update_host(x2_host.data(), x2.data(), x2.size(), stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle, stream);
 
   for (int i = 0; i < n1; i++) {
     for (int j = 0; j < n2; j++) {
diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu
index 66d5a77dbf..00653f4ced 100644
--- a/cpp/test/distance/masked_nn.cu
+++ b/cpp/test/distance/masked_nn.cu
@@ -20,6 +20,7 @@
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/kvp.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/detail/masked_nn.cuh>
 #include <raft/distance/masked_nn.cuh>
 #include <raft/linalg/norm.cuh>
@@ -186,7 +187,7 @@ struct Inputs {
     // Initialize adj, group_idxs.
     dim3 block(32, 32);
     dim3 grid(10, 10);
-    init_adj<<<grid, block, 0, handle.get_stream()>>>(
+    init_adj<<<grid, block, 0, resource::get_cuda_stream(handle)>>>(
       p.pattern, p.n, adj.view(), group_idxs.view());
     RAFT_CUDA_TRY(cudaGetLastError());
   }
@@ -206,7 +207,7 @@ auto reference(const raft::handle_t& handle, Inputs<DataT> inp, const Params& p)
   }
 
   // Initialize workspace
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   rmm::device_uvector<char> workspace(p.m * sizeof(int), stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(workspace.data(), 0, sizeof(int) * m, stream));
 
@@ -282,7 +283,7 @@ auto run_masked_nn(const raft::handle_t& handle, Inputs<DataT> inp, const Params
                                                   inp.group_idxs.view(),
                                                   out.view());
 
-  handle.sync_stream();
+  resource::sync_stream(handle);
 
   return out;
 }
@@ -383,7 +384,7 @@ TEST_P(MaskedL2NNTest, ReferenceCheckFloat)
                           out_fast.data_handle(),
                           p.m,
                           CompareApproxAbsKVP<DataT>(p.tolerance),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 // This test checks whether running the masked_l2_nn twice returns the same
@@ -406,7 +407,7 @@ TEST_P(MaskedL2NNTest, DeterminismCheck)
                           out2.data_handle(),
                           p.m,
                           CompareApproxAbsKVP<DataT>(p.tolerance),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 TEST_P(MaskedL2NNTest, ReferenceCheckDouble)
@@ -427,7 +428,7 @@ TEST_P(MaskedL2NNTest, ReferenceCheckDouble)
                           out_fast.data_handle(),
                           p.m,
                           CompareApproxAbsKVP<DataT>(p.tolerance),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 INSTANTIATE_TEST_CASE_P(MaskedL2NNTests, MaskedL2NNTest, ::testing::ValuesIn(gen_params()));
diff --git a/cpp/test/distance/masked_nn_compress_to_bits.cu b/cpp/test/distance/masked_nn_compress_to_bits.cu
index e7d75780be..9474244a8f 100644
--- a/cpp/test/distance/masked_nn_compress_to_bits.cu
+++ b/cpp/test/distance/masked_nn_compress_to_bits.cu
@@ -22,6 +22,7 @@
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/detail/compress_to_bits.cuh>
 #include <raft/matrix/init.cuh>
 #include <raft/random/rng.cuh>
@@ -85,7 +86,7 @@ __global__ void decompress_bits_kernel(const T* in, int in_rows, int in_cols, bo
 template <typename T = uint64_t, typename = std::enable_if_t<std::is_integral<T>::value>>
 void decompress_bits(const raft::handle_t& handle, const T* in, int in_rows, int in_cols, bool* out)
 {
-  auto stream = handle.get_stream();
+  auto stream = resource::get_cuda_stream(handle);
   dim3 grid(raft::ceildiv(in_cols, 32), raft::ceildiv(in_rows, 32));
   dim3 block(32, 32);
   decompress_bits_kernel<<<grid, block, 0, stream>>>(in, in_rows, in_cols, out);
@@ -129,18 +130,18 @@ void check_invertible(const Params& p)
   auto tmp = raft::make_device_matrix<T, int>(handle, tmp_m, n);
   auto out = raft::make_device_matrix<bool, int>(handle, out_m, n);
 
-  handle.sync_stream();
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 
   ASSERT_EQ(in.extent(0), out.extent(0)) << "M does not match";
   ASSERT_EQ(in.extent(1), out.extent(1)) << "N does not match";
 
   compress_to_bits(handle, in.view(), tmp.view());
-  handle.sync_stream();
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 
   decompress_bits(handle, tmp.data_handle(), tmp.extent(0), tmp.extent(1), out.data_handle());
-  handle.sync_stream();
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 
   // Check for differences.
@@ -148,8 +149,8 @@ void check_invertible(const Params& p)
                                 out.data_handle(),
                                 in.extent(0) * in.extent(1),
                                 raft::Compare<bool>(),
-                                handle.get_stream()));
-  handle.sync_stream();
+                                resource::get_cuda_stream(handle)));
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 }
 
@@ -170,11 +171,11 @@ void check_all_true(const Params& p)
 
   int tmp_m = raft::ceildiv(m, bits_per_elem);
   auto tmp  = raft::make_device_matrix<T, int>(handle, tmp_m, n);
-  handle.sync_stream();
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 
   compress_to_bits(handle, in.view(), tmp.view());
-  handle.sync_stream();
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 
   auto expected = raft::make_device_matrix<T, int>(handle, tmp_m, n);
@@ -185,8 +186,8 @@ void check_all_true(const Params& p)
                                 tmp.data_handle(),
                                 tmp.extent(0) * tmp.extent(1),
                                 raft::Compare<T>(),
-                                handle.get_stream()));
-  handle.sync_stream();
+                                resource::get_cuda_stream(handle)));
+  resource::sync_stream(handle);
   RAFT_CUDA_TRY(cudaGetLastError());
 }
 
diff --git a/cpp/test/label/merge_labels.cu b/cpp/test/label/merge_labels.cu
index 5107015652..022581c655 100644
--- a/cpp/test/label/merge_labels.cu
+++ b/cpp/test/label/merge_labels.cu
@@ -15,10 +15,11 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/label/merge_labels.cuh>
 
 #include "../test_utils.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -43,7 +44,7 @@ class MergeLabelsTest : public ::testing::TestWithParam<MergeLabelsInputs<Index_
  protected:
   MergeLabelsTest()
     : params(::testing::TestWithParam<MergeLabelsInputs<Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       labels_a(params.N, stream),
       labels_b(params.N, stream),
       expected(params.N, stream),
@@ -69,7 +70,7 @@ class MergeLabelsTest : public ::testing::TestWithParam<MergeLabelsInputs<Index_
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MergeLabelsInputs<Index_> params;
diff --git a/cpp/test/lap/lap.cu b/cpp/test/lap/lap.cu
index f26e41456f..7576465633 100644
--- a/cpp/test/lap/lap.cu
+++ b/cpp/test/lap/lap.cu
@@ -23,6 +23,7 @@
  *
  */
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <rmm/device_uvector.hpp>
 
@@ -66,7 +67,7 @@ void hungarian_test(int problemsize,
                     weight_t epsilon,
                     bool verbose = false)
 {
-  raft::device_resources handle;
+  raft::resources handle;
 
   weight_t* h_cost = new weight_t[batchsize * problemsize * problemsize];
 
@@ -74,12 +75,16 @@ void hungarian_test(int problemsize,
     generateProblem(h_cost, batchsize, problemsize, costrange);
 
     rmm::device_uvector<weight_t> elements_v(batchsize * problemsize * problemsize,
-                                             handle.get_stream());
-    rmm::device_uvector<vertex_t> row_assignment_v(batchsize * problemsize, handle.get_stream());
-    rmm::device_uvector<vertex_t> col_assignment_v(batchsize * problemsize, handle.get_stream());
-
-    raft::update_device(
-      elements_v.data(), h_cost, batchsize * problemsize * problemsize, handle.get_stream());
+                                             resource::get_cuda_stream(handle));
+    rmm::device_uvector<vertex_t> row_assignment_v(batchsize * problemsize,
+                                                   resource::get_cuda_stream(handle));
+    rmm::device_uvector<vertex_t> col_assignment_v(batchsize * problemsize,
+                                                   resource::get_cuda_stream(handle));
+
+    raft::update_device(elements_v.data(),
+                        h_cost,
+                        batchsize * problemsize * problemsize,
+                        resource::get_cuda_stream(handle));
 
     for (int i = 0; i < repetitions; i++) {
       float start = omp_get_wtime();
diff --git a/cpp/test/linalg/add.cu b/cpp/test/linalg/add.cu
index 3836f714cb..668f4c0691 100644
--- a/cpp/test/linalg/add.cu
+++ b/cpp/test/linalg/add.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include "add.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -29,7 +30,7 @@ class AddTest : public ::testing::TestWithParam<AddInputs<InT, OutT>> {
  public:
   AddTest()
     : params(::testing::TestWithParam<AddInputs<InT, OutT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.len, stream),
       in2(params.len, stream),
       out_ref(params.len, stream),
@@ -52,7 +53,7 @@ class AddTest : public ::testing::TestWithParam<AddInputs<InT, OutT>> {
     auto in2_view = raft::make_device_vector_view<const InT>(in2.data(), in2.size());
 
     add(handle, in1_view, in2_view, out_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void compare()
@@ -62,7 +63,7 @@ class AddTest : public ::testing::TestWithParam<AddInputs<InT, OutT>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   AddInputs<InT, OutT> params;
diff --git a/cpp/test/linalg/axpy.cu b/cpp/test/linalg/axpy.cu
index 5fd7676792..887e31bb18 100644
--- a/cpp/test/linalg/axpy.cu
+++ b/cpp/test/linalg/axpy.cu
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/axpy.cuh>
 
 #include "../test_utils.cuh"
@@ -45,7 +46,7 @@ struct AxpyInputs {
 template <typename T, typename IndexType = int>
 class AxpyTest : public ::testing::TestWithParam<AxpyInputs<T>> {
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   AxpyInputs<T, IndexType> params;
   rmm::device_uvector<T> refy;
   rmm::device_uvector<T> y_device_alpha;
@@ -54,11 +55,11 @@ class AxpyTest : public ::testing::TestWithParam<AxpyInputs<T>> {
  public:
   AxpyTest()
     : testing::TestWithParam<AxpyInputs<T>>(),
-      refy(0, handle.get_stream()),
-      y_host_alpha(0, handle.get_stream()),
-      y_device_alpha(0, handle.get_stream())
+      refy(0, resource::get_cuda_stream(handle)),
+      y_host_alpha(0, resource::get_cuda_stream(handle)),
+      y_device_alpha(0, resource::get_cuda_stream(handle))
   {
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
@@ -66,7 +67,7 @@ class AxpyTest : public ::testing::TestWithParam<AxpyInputs<T>> {
   {
     params = ::testing::TestWithParam<AxpyInputs<T>>::GetParam();
 
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     raft::random::RngState r(params.seed);
 
@@ -145,7 +146,7 @@ class AxpyTest : public ::testing::TestWithParam<AxpyInputs<T>> {
            make_device_vector_view<T>(y_device_alpha.data(), params.len));
     }
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
   void TearDown() override {}
diff --git a/cpp/test/linalg/binary_op.cu b/cpp/test/linalg/binary_op.cu
index 9936e665ba..df8a38bf4e 100644
--- a/cpp/test/linalg/binary_op.cu
+++ b/cpp/test/linalg/binary_op.cu
@@ -18,6 +18,7 @@
 #include "binary_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -30,11 +31,8 @@ namespace linalg {
 // for an extended __device__ lambda cannot have private or protected access
 // within its class
 template <typename InType, typename IdxType, typename OutType>
-void binaryOpLaunch(const raft::device_resources& handle,
-                    OutType* out,
-                    const InType* in1,
-                    const InType* in2,
-                    IdxType len)
+void binaryOpLaunch(
+  const raft::resources& handle, OutType* out, const InType* in1, const InType* in2, IdxType len)
 {
   auto out_view = raft::make_device_vector_view(out, len);
   auto in1_view = raft::make_device_vector_view(in1, len);
@@ -48,7 +46,7 @@ class BinaryOpTest : public ::testing::TestWithParam<BinaryOpInputs<InType, IdxT
  public:
   BinaryOpTest()
     : params(::testing::TestWithParam<BinaryOpInputs<InType, IdxType, OutType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.len, stream),
       in2(params.len, stream),
       out_ref(params.len, stream),
@@ -65,11 +63,11 @@ class BinaryOpTest : public ::testing::TestWithParam<BinaryOpInputs<InType, IdxT
     uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0));
     naiveAdd(out_ref.data(), in1.data(), in2.data(), len);
     binaryOpLaunch(handle, out.data(), in1.data(), in2.data(), len);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   BinaryOpInputs<InType, IdxType, OutType> params;
@@ -132,7 +130,7 @@ class BinaryOpAlignment : public ::testing::Test {
  public:
   void Misaligned()
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     // Test to trigger cudaErrorMisalignedAddress if veclen is incorrectly
     // chosen.
     int n = 1024;
@@ -141,11 +139,15 @@ class BinaryOpAlignment : public ::testing::Test {
     rmm::device_uvector<math_t> z(n, stream);
     RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, n * sizeof(math_t), stream));
     RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, n * sizeof(math_t), stream));
-    raft::linalg::binaryOp(
-      z.data() + 9, x.data() + 137, y.data() + 19, 256, raft::add_op{}, handle.get_stream());
+    raft::linalg::binaryOp(z.data() + 9,
+                           x.data() + 137,
+                           y.data() + 19,
+                           256,
+                           raft::add_op{},
+                           resource::get_cuda_stream(handle));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
 };
 typedef ::testing::Types<float, double> FloatTypes;
 TYPED_TEST_CASE(BinaryOpAlignment, FloatTypes);
diff --git a/cpp/test/linalg/cholesky_r1.cu b/cpp/test/linalg/cholesky_r1.cu
index fba885957f..961ab8a7a2 100644
--- a/cpp/test/linalg/cholesky_r1.cu
+++ b/cpp/test/linalg/cholesky_r1.cu
@@ -15,7 +15,9 @@
  */
 
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/cholesky_r1_update.cuh>
 #include <raft/linalg/detail/cusolver_wrappers.hpp>
 #include <raft/util/cudart_utils.hpp>
@@ -32,16 +34,16 @@ template <typename math_t>
 class CholeskyR1Test : public ::testing::Test {
  protected:
   CholeskyR1Test()
-    : G(n_rows * n_rows, handle.get_stream()),
-      L(n_rows * n_rows, handle.get_stream()),
-      L_exp(n_rows * n_rows, handle.get_stream()),
-      devInfo(handle.get_stream()),
-      workspace(0, handle.get_stream())
+    : G(n_rows * n_rows, resource::get_cuda_stream(handle)),
+      L(n_rows * n_rows, resource::get_cuda_stream(handle)),
+      L_exp(n_rows * n_rows, resource::get_cuda_stream(handle)),
+      devInfo(resource::get_cuda_stream(handle)),
+      workspace(0, resource::get_cuda_stream(handle))
   {
-    raft::update_device(G.data(), G_host, n_rows * n_rows, handle.get_stream());
+    raft::update_device(G.data(), G_host, n_rows * n_rows, resource::get_cuda_stream(handle));
 
     // Allocate workspace
-    solver_handle = handle.get_cusolver_dn_handle();
+    solver_handle = resource::get_cusolver_dn_handle(handle);
     // TODO: Call from public API when ready
     RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf_bufferSize(
       solver_handle, CUBLAS_FILL_MODE_LOWER, n_rows, L.data(), n_rows, &Lwork));
@@ -55,9 +57,9 @@ class CholeskyR1Test : public ::testing::Test {
                                       nullptr,
                                       &n_bytes,
                                       CUBLAS_FILL_MODE_LOWER,
-                                      handle.get_stream());
+                                      resource::get_cuda_stream(handle));
     Lwork = std::max(Lwork * sizeof(math_t), (size_t)n_bytes);
-    workspace.resize(Lwork, handle.get_stream());
+    workspace.resize(Lwork, resource::get_cuda_stream(handle));
   }
 
   void testR1Update()
@@ -65,14 +67,14 @@ class CholeskyR1Test : public ::testing::Test {
     int n = n_rows * n_rows;
     std::vector<cublasFillMode_t> fillmode{CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER};
     for (auto uplo : fillmode) {
-      raft::copy(L.data(), G.data(), n, handle.get_stream());
+      raft::copy(L.data(), G.data(), n, resource::get_cuda_stream(handle));
       for (int rank = 1; rank <= n_rows; rank++) {
         std::stringstream ss;
         ss << "Rank " << rank << ((uplo == CUBLAS_FILL_MODE_LOWER) ? ", lower" : ", upper");
         SCOPED_TRACE(ss.str());
 
         // Expected solution using Cholesky factorization from scratch
-        raft::copy(L_exp.data(), G.data(), n, handle.get_stream());
+        raft::copy(L_exp.data(), G.data(), n, resource::get_cuda_stream(handle));
         // TODO: Call from public API when ready
         RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf(solver_handle,
                                                                 uplo,
@@ -82,40 +84,59 @@ class CholeskyR1Test : public ::testing::Test {
                                                                 (math_t*)workspace.data(),
                                                                 Lwork,
                                                                 devInfo.data(),
-                                                                handle.get_stream()));
+                                                                resource::get_cuda_stream(handle)));
 
         // Incremental Cholesky factorization using rank one updates.
-        raft::linalg::choleskyRank1Update(
-          handle, L.data(), rank, n_rows, workspace.data(), &Lwork, uplo, handle.get_stream());
+        raft::linalg::choleskyRank1Update(handle,
+                                          L.data(),
+                                          rank,
+                                          n_rows,
+                                          workspace.data(),
+                                          &Lwork,
+                                          uplo,
+                                          resource::get_cuda_stream(handle));
 
         ASSERT_TRUE(raft::devArrMatch(L_exp.data(),
                                       L.data(),
                                       n_rows * rank,
                                       raft::CompareApprox<math_t>(3e-3),
-                                      handle.get_stream()));
+                                      resource::get_cuda_stream(handle)));
       }
     }
   }
 
   void testR1Error()
   {
-    raft::update_device(G.data(), G2_host, 4, handle.get_stream());
+    raft::update_device(G.data(), G2_host, 4, resource::get_cuda_stream(handle));
     std::vector<cublasFillMode_t> fillmode{CUBLAS_FILL_MODE_LOWER, CUBLAS_FILL_MODE_UPPER};
     for (auto uplo : fillmode) {
-      raft::copy(L.data(), G.data(), 4, handle.get_stream());
+      raft::copy(L.data(), G.data(), 4, resource::get_cuda_stream(handle));
       ASSERT_NO_THROW(raft::linalg::choleskyRank1Update(
-        handle, L.data(), 1, 2, workspace.data(), &Lwork, uplo, handle.get_stream()));
-      ASSERT_THROW(raft::linalg::choleskyRank1Update(
-                     handle, L.data(), 2, 2, workspace.data(), &Lwork, uplo, handle.get_stream()),
+        handle, L.data(), 1, 2, workspace.data(), &Lwork, uplo, resource::get_cuda_stream(handle)));
+      ASSERT_THROW(raft::linalg::choleskyRank1Update(handle,
+                                                     L.data(),
+                                                     2,
+                                                     2,
+                                                     workspace.data(),
+                                                     &Lwork,
+                                                     uplo,
+                                                     resource::get_cuda_stream(handle)),
                    raft::exception);
 
       math_t eps = std::numeric_limits<math_t>::epsilon();
-      ASSERT_NO_THROW(raft::linalg::choleskyRank1Update(
-        handle, L.data(), 2, 2, workspace.data(), &Lwork, uplo, handle.get_stream(), eps));
+      ASSERT_NO_THROW(raft::linalg::choleskyRank1Update(handle,
+                                                        L.data(),
+                                                        2,
+                                                        2,
+                                                        workspace.data(),
+                                                        &Lwork,
+                                                        uplo,
+                                                        resource::get_cuda_stream(handle),
+                                                        eps));
     }
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   cusolverDnHandle_t solver_handle;
diff --git a/cpp/test/linalg/coalesced_reduction.cu b/cpp/test/linalg/coalesced_reduction.cu
index 1309d4c9c1..6d78788aef 100644
--- a/cpp/test/linalg/coalesced_reduction.cu
+++ b/cpp/test/linalg/coalesced_reduction.cu
@@ -18,6 +18,7 @@
 #include "reduce.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/coalesced_reduction.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -43,12 +44,8 @@ template <typename T>
 // for an extended __device__ lambda cannot have private or protected access
 // within its class
 template <typename T>
-void coalescedReductionLaunch(const raft::device_resources& handle,
-                              T* dots,
-                              const T* data,
-                              int cols,
-                              int rows,
-                              bool inplace = false)
+void coalescedReductionLaunch(
+  const raft::resources& handle, T* dots, const T* data, int cols, int rows, bool inplace = false)
 {
   auto dots_view = raft::make_device_vector_view(dots, rows);
   auto data_view = raft::make_device_matrix_view(data, rows, cols);
@@ -60,7 +57,7 @@ class coalescedReductionTest : public ::testing::TestWithParam<coalescedReductio
  public:
   coalescedReductionTest()
     : params(::testing::TestWithParam<coalescedReductionInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       dots_exp(params.rows * params.cols, stream),
       dots_act(params.rows * params.cols, stream)
@@ -101,11 +98,11 @@ class coalescedReductionTest : public ::testing::TestWithParam<coalescedReductio
     coalescedReductionLaunch(handle, dots_act.data(), data.data(), cols, rows);
     coalescedReductionLaunch(handle, dots_act.data(), data.data(), cols, rows, true);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   coalescedReductionInputs<T> params;
diff --git a/cpp/test/linalg/divide.cu b/cpp/test/linalg/divide.cu
index 6188e891d5..1e3b26fe8b 100644
--- a/cpp/test/linalg/divide.cu
+++ b/cpp/test/linalg/divide.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/divide.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -45,7 +46,7 @@ class DivideTest : public ::testing::TestWithParam<raft::linalg::UnaryOpInputs<T
  public:
   DivideTest()
     : params(::testing::TestWithParam<raft::linalg::UnaryOpInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       out_ref(params.len, stream),
       out(params.len, stream)
@@ -63,11 +64,11 @@ class DivideTest : public ::testing::TestWithParam<raft::linalg::UnaryOpInputs<T
     auto in_view     = raft::make_device_vector_view<const T>(in.data(), len);
     auto scalar_view = raft::make_host_scalar_view<const T>(&params.scalar);
     divide_scalar(handle, in_view, out_view, scalar_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   UnaryOpInputs<T> params;
diff --git a/cpp/test/linalg/dot.cu b/cpp/test/linalg/dot.cu
index 8b8ca374d7..dd45a88375 100644
--- a/cpp/test/linalg/dot.cu
+++ b/cpp/test/linalg/dot.cu
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/dot.cuh>
 
 #include "../test_utils.cuh"
@@ -57,8 +58,8 @@ class DotTest : public ::testing::TestWithParam<DotInputs<T>> {
   {
     params = ::testing::TestWithParam<DotInputs<T>>::GetParam();
 
-    raft::device_resources handle;
-    cudaStream_t stream = handle.get_stream();
+    raft::resources handle;
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     raft::random::RngState r(params.seed);
 
@@ -70,13 +71,13 @@ class DotTest : public ::testing::TestWithParam<DotInputs<T>> {
     uniform(handle, r, x.data(), x_len, T(-1.0), T(1.0));
     uniform(handle, r, y.data(), y_len, T(-1.0), T(1.0));
 
-    rmm::device_scalar<T> ref(0, handle.get_stream());
+    rmm::device_scalar<T> ref(0, resource::get_cuda_stream(handle));
     naiveDot<<<256, 256, 0, stream>>>(
       params.len, x.data(), params.incx, y.data(), params.incy, ref.data());
     raft::update_host(&ref_output, ref.data(), 1, stream);
 
     // Test out both the device and host api's
-    rmm::device_scalar<T> out(0, handle.get_stream());
+    rmm::device_scalar<T> out(0, resource::get_cuda_stream(handle));
     auto device_out_view = make_device_scalar_view<T, IndexType>(out.data());
     auto host_out_view   = make_host_scalar_view<T, IndexType>(&host_output);
 
@@ -106,7 +107,7 @@ class DotTest : public ::testing::TestWithParam<DotInputs<T>> {
       dot(handle, x_view, y_view, host_out_view);
     }
     raft::update_host(&device_output, out.data(), 1, stream);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
   void TearDown() override {}
diff --git a/cpp/test/linalg/eig.cu b/cpp/test/linalg/eig.cu
index 99d6cd0a31..49990ce300 100644
--- a/cpp/test/linalg/eig.cu
+++ b/cpp/test/linalg/eig.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/eig.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -45,7 +46,7 @@ class EigTest : public ::testing::TestWithParam<EigInputs<T>> {
  public:
   EigTest()
     : params(::testing::TestWithParam<EigInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       cov_matrix(params.len, stream),
       eig_vectors(params.len, stream),
       eig_vectors_jacobi(params.len, stream),
@@ -137,11 +138,11 @@ class EigTest : public ::testing::TestWithParam<EigInputs<T>> {
                eig_vals_jacobi_large_view,
                tol,
                sweeps);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   EigInputs<T> params;
diff --git a/cpp/test/linalg/eig_sel.cu b/cpp/test/linalg/eig_sel.cu
index 24e8e83832..a067cd140e 100644
--- a/cpp/test/linalg/eig_sel.cu
+++ b/cpp/test/linalg/eig_sel.cu
@@ -18,6 +18,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/eig.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -44,7 +45,7 @@ class EigSelTest : public ::testing::TestWithParam<EigSelInputs<T>> {
  public:
   EigSelTest()
     : params(::testing::TestWithParam<EigSelInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       cov_matrix(params.len, stream),
       eig_vectors(params.n_eigen_vals * params.n, stream),
       eig_vectors_ref(params.n_eigen_vals * params.n, stream),
@@ -95,11 +96,11 @@ class EigSelTest : public ::testing::TestWithParam<EigSelInputs<T>> {
                                    eig_vals_view,
                                    static_cast<std::size_t>(params.n_eigen_vals),
                                    EigVecMemUsage::OVERWRITE_INPUT);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   EigSelInputs<T> params;
diff --git a/cpp/test/linalg/eigen_solvers.cu b/cpp/test/linalg/eigen_solvers.cu
index ca34b0c3a4..cf75ff89bf 100644
--- a/cpp/test/linalg/eigen_solvers.cu
+++ b/cpp/test/linalg/eigen_solvers.cu
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/resource/device_id.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/spectral/eigen_solvers.cuh>
 #include <raft/spectral/partition.cuh>
 
@@ -36,13 +37,8 @@ TEST(Raft, EigenSolvers)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
-  ASSERT_EQ(0,
-            h.
-
-            get_device()
-
-  );
+  raft::resources h;
+  ASSERT_EQ(0, resource::get_device_id(h));
 
   index_type* ro{nullptr};
   index_type* ci{nullptr};
@@ -82,11 +78,8 @@ TEST(Raft, SpectralSolvers)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
-  ASSERT_EQ(0,
-            h.
-
-            get_device()
+  raft::resources h;
+  ASSERT_EQ(0, resource::get_device_id(h)
 
   );
 
diff --git a/cpp/test/linalg/eltwise.cu b/cpp/test/linalg/eltwise.cu
index d8c72991c3..6bb0960990 100644
--- a/cpp/test/linalg/eltwise.cu
+++ b/cpp/test/linalg/eltwise.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -60,7 +61,7 @@ class ScalarMultiplyTest : public ::testing::TestWithParam<ScalarMultiplyInputs<
  public:
   ScalarMultiplyTest()
     : params(::testing::TestWithParam<ScalarMultiplyInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(len, stream),
       out_ref(len, stream),
       out(len, stream)
@@ -76,11 +77,11 @@ class ScalarMultiplyTest : public ::testing::TestWithParam<ScalarMultiplyInputs<
     uniform(handle, r, in, len, T(-1.0), T(1.0));
     naiveScale(out_ref, in, scalar, len, stream);
     scalarMultiply(out, in, scalar, len, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   ScalarMultiplyInputs<T> params;
@@ -146,7 +147,7 @@ class EltwiseAddTest : public ::testing::TestWithParam<EltwiseAddInputs<T>> {
  public:
   EltwiseAddTest()
     : params(::testing::TestWithParam<EltwiseAddInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.len, stream),
       in2(params.len, stream),
       out_ref(params.len, stream),
@@ -164,11 +165,11 @@ class EltwiseAddTest : public ::testing::TestWithParam<EltwiseAddInputs<T>> {
     uniform(handle, r, in2, len, T(-1.0), T(1.0));
     naiveAdd(out_ref, in1, in2, len, stream);
     eltwiseAdd(out, in1, in2, len, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   EltwiseAddInputs<T> params;
diff --git a/cpp/test/linalg/gemm_layout.cu b/cpp/test/linalg/gemm_layout.cu
index 47b7e22d5d..898c8ad5aa 100644
--- a/cpp/test/linalg/gemm_layout.cu
+++ b/cpp/test/linalg/gemm_layout.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/gemm.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -63,8 +64,8 @@ class GemmLayoutTest : public ::testing::TestWithParam<GemmLayoutInputs<T>> {
   {
     params = ::testing::TestWithParam<GemmLayoutInputs<T>>::GetParam();
 
-    raft::device_resources handle;
-    cudaStream_t stream = handle.get_stream();
+    raft::resources handle;
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     raft::random::RngState r(params.seed);
 
@@ -123,7 +124,7 @@ class GemmLayoutTest : public ::testing::TestWithParam<GemmLayoutInputs<T>> {
       gemm(handle, x_view_row_major, y_view_row_major, z_view_row_major);
     }
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
   void TearDown() override
diff --git a/cpp/test/linalg/gemv.cu b/cpp/test/linalg/gemv.cu
index b4f338fdd1..beedb1894a 100644
--- a/cpp/test/linalg/gemv.cu
+++ b/cpp/test/linalg/gemv.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/gemv.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -85,8 +86,8 @@ class GemvTest : public ::testing::TestWithParam<GemvInputs<T>> {
   {
     params = ::testing::TestWithParam<GemvInputs<T>>::GetParam();
 
-    raft::device_resources handle;
-    cudaStream_t stream = handle.get_stream();
+    raft::resources handle;
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     raft::random::RngState r(params.seed);
 
@@ -123,7 +124,7 @@ class GemvTest : public ::testing::TestWithParam<GemvInputs<T>> {
       gemv(handle, A_col_major, x_view, y_view);
     }
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
   void TearDown() override {}
diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu
index 8f2c3ed372..97f13c66db 100644
--- a/cpp/test/linalg/map.cu
+++ b/cpp/test/linalg/map.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/linalg/map.cuh>
 #include <raft/matrix/init.cuh>
@@ -89,7 +90,8 @@ void mapLaunch(OutType* out,
                IdxType len,
                cudaStream_t stream)
 {
-  raft::device_resources handle{stream};
+  raft::resources handle;
+  resource::set_cuda_stream(handle, stream);
   auto out_view = raft::make_device_vector_view(out, len);
   auto in1_view = raft::make_device_vector_view(in1, len);
   auto in2_view = raft::make_device_vector_view(in2, len);
@@ -132,7 +134,7 @@ class MapTest : public ::testing::TestWithParam<MapInputs<InType, IdxType, OutTy
  public:
   MapTest()
     : params(::testing::TestWithParam<MapInputs<InType, IdxType, OutType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.len, stream),
       in2(params.len, stream),
       in3(params.len, stream),
@@ -180,7 +182,7 @@ class MapTest : public ::testing::TestWithParam<MapInputs<InType, IdxType, OutTy
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MapInputs<InType, IdxType, OutType> params;
@@ -193,7 +195,7 @@ class MapOffsetTest : public ::testing::TestWithParam<MapInputs<OutType, IdxType
  public:
   MapOffsetTest()
     : params(::testing::TestWithParam<MapInputs<OutType, IdxType, OutType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       out_ref(params.len, stream),
       out(params.len, stream)
   {
@@ -214,7 +216,7 @@ class MapOffsetTest : public ::testing::TestWithParam<MapInputs<OutType, IdxType
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MapInputs<OutType, IdxType, OutType> params;
diff --git a/cpp/test/linalg/map_then_reduce.cu b/cpp/test/linalg/map_then_reduce.cu
index ae5058ef3e..de54e08c80 100644
--- a/cpp/test/linalg/map_then_reduce.cu
+++ b/cpp/test/linalg/map_then_reduce.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <limits>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/map_reduce.cuh>
 #include <raft/linalg/map_then_reduce.cuh>
 #include <raft/random/rng.cuh>
@@ -74,7 +75,7 @@ class MapReduceTest : public ::testing::TestWithParam<MapReduceInputs<InType>> {
  public:
   MapReduceTest()
     : params(::testing::TestWithParam<MapReduceInputs<InType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       out_ref(params.len, stream),
       out(params.len, stream)
@@ -89,11 +90,11 @@ class MapReduceTest : public ::testing::TestWithParam<MapReduceInputs<InType>> {
     auto len = params.len;
     uniform(handle, r, in.data(), len, InType(-1.0), InType(1.0));
     mapReduceLaunch(out_ref.data(), out.data(), in.data(), len, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MapReduceInputs<InType> params;
@@ -133,9 +134,10 @@ class MapGenericReduceTest : public ::testing::Test {
   using OutType = typename T::second_type;
 
  protected:
-  MapGenericReduceTest() : input(n, handle.get_stream()), output(handle.get_stream())
+  MapGenericReduceTest()
+    : input(n, resource::get_cuda_stream(handle)), output(resource::get_cuda_stream(handle))
   {
-    initInput(input.data(), input.size(), handle.get_stream());
+    initInput(input.data(), input.size(), resource::get_cuda_stream(handle));
   }
 
  public:
@@ -144,9 +146,9 @@ class MapGenericReduceTest : public ::testing::Test {
     raft::random::RngState r(137);
     uniform(handle, r, input, n, InType(2), InType(3));
     InType val = 1;
-    raft::update_device(input + 42, &val, 1, handle.get_stream());
+    raft::update_device(input + 42, &val, 1, resource::get_cuda_stream(handle));
     val = 5;
-    raft::update_device(input + 337, &val, 1, handle.get_stream());
+    raft::update_device(input + 337, &val, 1, resource::get_cuda_stream(handle));
   }
 
   void testMin()
@@ -157,7 +159,7 @@ class MapGenericReduceTest : public ::testing::Test {
       input.data(), static_cast<std::uint32_t>(input.size()));
     map_reduce(handle, input_view, output_view, neutral, raft::identity_op{}, cub::Min());
     EXPECT_TRUE(raft::devArrMatch(
-      OutType(1), output.data(), 1, raft::Compare<OutType>(), handle.get_stream()));
+      OutType(1), output.data(), 1, raft::Compare<OutType>(), resource::get_cuda_stream(handle)));
   }
   void testMax()
   {
@@ -167,11 +169,11 @@ class MapGenericReduceTest : public ::testing::Test {
       input.data(), static_cast<std::uint32_t>(input.size()));
     map_reduce(handle, input_view, output_view, neutral, raft::identity_op{}, cub::Max());
     EXPECT_TRUE(raft::devArrMatch(
-      OutType(5), output.data(), 1, raft::Compare<OutType>(), handle.get_stream()));
+      OutType(5), output.data(), 1, raft::Compare<OutType>(), resource::get_cuda_stream(handle)));
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   int n = 1237;
diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/test/linalg/matrix_vector.cu
index 602d01f60c..f56f51b48b 100644
--- a/cpp/test/linalg/matrix_vector.cu
+++ b/cpp/test/linalg/matrix_vector.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/matrix_vector.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -46,7 +47,7 @@ template <typename T, typename IdxType>
 // for an extended __device__ lambda cannot have private or protected access
 // within its class
 template <typename T, typename IdxType>
-void matrix_vector_op_launch(const raft::device_resources& handle,
+void matrix_vector_op_launch(const raft::resources& handle,
                              T* in,
                              const T* vec1,
                              IdxType D,
@@ -98,7 +99,7 @@ void matrix_vector_op_launch(const raft::device_resources& handle,
 }
 
 template <typename T, typename IdxType>
-void naive_matrix_vector_op_launch(const raft::device_resources& handle,
+void naive_matrix_vector_op_launch(const raft::resources& handle,
                                    T* in,
                                    const T* vec1,
                                    IdxType D,
@@ -107,7 +108,7 @@ void naive_matrix_vector_op_launch(const raft::device_resources& handle,
                                    bool bcast_along_rows,
                                    int operation_type)
 {
-  auto stream                       = handle.get_stream();
+  auto stream                       = resource::get_cuda_stream(handle);
   auto operation_bin_mult_skip_zero = [] __device__(T mat_element, T vec_element) {
     if (vec_element != T(0)) {
       return mat_element * vec_element;
@@ -144,7 +145,7 @@ class MatrixVectorTest : public ::testing::TestWithParam<MatrixVectorInputs<T, I
  public:
   MatrixVectorTest()
     : params(::testing::TestWithParam<MatrixVectorInputs<T, IdxType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.rows * params.cols, stream),
       out_ref(params.rows * params.cols, stream),
       out(params.rows * params.cols, stream),
@@ -161,8 +162,8 @@ class MatrixVectorTest : public ::testing::TestWithParam<MatrixVectorInputs<T, I
     IdxType vecLen = params.bcast_along_rows ? D : N;
     uniform(handle, r, in.data(), len, (T)-1.0, (T)1.0);
     uniform(handle, r, vec1.data(), vecLen, (T)-1.0, (T)1.0);
-    raft::copy(out_ref.data(), in.data(), len, handle.get_stream());
-    raft::copy(out.data(), in.data(), len, handle.get_stream());
+    raft::copy(out_ref.data(), in.data(), len, resource::get_cuda_stream(handle));
+    raft::copy(out.data(), in.data(), len, resource::get_cuda_stream(handle));
     naive_matrix_vector_op_launch(handle,
                                   out_ref.data(),
                                   vec1.data(),
@@ -179,11 +180,11 @@ class MatrixVectorTest : public ::testing::TestWithParam<MatrixVectorInputs<T, I
                             params.row_major,
                             params.bcast_along_rows,
                             params.operation_type);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MatrixVectorInputs<T, IdxType> params;
diff --git a/cpp/test/linalg/matrix_vector_op.cu b/cpp/test/linalg/matrix_vector_op.cu
index 5ba178e212..c39e9dd164 100644
--- a/cpp/test/linalg/matrix_vector_op.cu
+++ b/cpp/test/linalg/matrix_vector_op.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include "matrix_vector_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -41,7 +42,7 @@ template <typename IdxType>
 }
 
 template <typename T, typename LenT>
-inline void gen_uniform(const raft::device_resources& handle,
+inline void gen_uniform(const raft::resources& handle,
                         raft::random::RngState& rng,
                         T* ptr,
                         LenT len)
@@ -57,7 +58,7 @@ inline void gen_uniform(const raft::device_resources& handle,
 // for an extended __device__ lambda cannot have private or protected access
 // within its class
 template <typename OpT, typename MatT, typename IdxType, typename Vec1T, typename Vec2T>
-void matrixVectorOpLaunch(const raft::device_resources& handle,
+void matrixVectorOpLaunch(const raft::resources& handle,
                           MatT* out,
                           const MatT* in,
                           const Vec1T* vec1,
@@ -101,7 +102,7 @@ template <typename OpT,
 class MatVecOpTest : public ::testing::TestWithParam<MatVecOpInputs<IdxType>> {
  public:
   MatVecOpTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       params(::testing::TestWithParam<MatVecOpInputs<IdxType>>::GetParam()),
       vec_size(params.bcastAlongRows ? params.cols : params.rows),
       in(params.rows * params.cols + params.inAlignOffset, stream),
@@ -155,11 +156,11 @@ class MatVecOpTest : public ::testing::TestWithParam<MatVecOpInputs<IdxType>> {
                               params.rows,
                               params.rowMajor,
                               params.bcastAlongRows);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MatVecOpInputs<IdxType> params;
diff --git a/cpp/test/linalg/mean_squared_error.cu b/cpp/test/linalg/mean_squared_error.cu
index aa1c314e68..1eb774053c 100644
--- a/cpp/test/linalg/mean_squared_error.cu
+++ b/cpp/test/linalg/mean_squared_error.cu
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/mean_squared_error.cuh>
 
 #include "../test_utils.cuh"
@@ -49,17 +50,17 @@ class MeanSquaredErrorTest : public ::testing::TestWithParam<MeanSquaredErrorInp
  protected:
   MeanSquaredErrorInputs<T> params;
 
-  raft::device_resources handle;
+  raft::resources handle;
   rmm::device_scalar<T> output;
   rmm::device_scalar<T> refoutput;
 
  public:
   MeanSquaredErrorTest()
     : testing::TestWithParam<MeanSquaredErrorInputs<T>>(),
-      output(0, handle.get_stream()),
-      refoutput(0, handle.get_stream())
+      output(0, resource::get_cuda_stream(handle)),
+      refoutput(0, resource::get_cuda_stream(handle))
   {
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
@@ -67,7 +68,7 @@ class MeanSquaredErrorTest : public ::testing::TestWithParam<MeanSquaredErrorInp
   {
     params = ::testing::TestWithParam<MeanSquaredErrorInputs<T>>::GetParam();
 
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     raft::random::RngState r(params.seed);
 
@@ -75,7 +76,7 @@ class MeanSquaredErrorTest : public ::testing::TestWithParam<MeanSquaredErrorInp
     rmm::device_uvector<T> b(params.len, stream);
     uniform(handle, r, a.data(), params.len, T(-1.0), T(1.0));
     uniform(handle, r, b.data(), params.len, T(-1.0), T(1.0));
-    handle.sync_stream();
+    resource::sync_stream(handle);
 
     mean_squared_error<T, std::uint32_t, T>(handle,
                                             make_device_vector_view<const T>(a.data(), params.len),
@@ -85,7 +86,7 @@ class MeanSquaredErrorTest : public ::testing::TestWithParam<MeanSquaredErrorInp
 
     naiveMeanSquaredError<<<256, 256, 0, stream>>>(
       params.len, a.data(), b.data(), params.weight, refoutput.data());
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
   void TearDown() override {}
diff --git a/cpp/test/linalg/multiply.cu b/cpp/test/linalg/multiply.cu
index b8af7515e0..0cd890b575 100644
--- a/cpp/test/linalg/multiply.cu
+++ b/cpp/test/linalg/multiply.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/multiply.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -29,7 +30,7 @@ class MultiplyTest : public ::testing::TestWithParam<UnaryOpInputs<T>> {
  public:
   MultiplyTest()
     : params(::testing::TestWithParam<UnaryOpInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       out_ref(params.len, stream),
       out(params.len, stream)
@@ -48,11 +49,11 @@ class MultiplyTest : public ::testing::TestWithParam<UnaryOpInputs<T>> {
     auto in_view     = raft::make_device_vector_view<const T>(in.data(), len);
     auto scalar_view = raft::make_host_scalar_view<const T>(&params.scalar);
     multiply_scalar(handle, in_view, out_view, scalar_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   UnaryOpInputs<T> params;
diff --git a/cpp/test/linalg/norm.cu b/cpp/test/linalg/norm.cu
index 6dfeced6e0..41bc12e1f9 100644
--- a/cpp/test/linalg/norm.cu
+++ b/cpp/test/linalg/norm.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/norm.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -78,7 +79,7 @@ class RowNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
  public:
   RowNormTest()
     : params(::testing::TestWithParam<NormInputs<T, IdxT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       dots_exp(params.rows, stream),
       dots_act(params.rows, stream)
@@ -109,11 +110,11 @@ class RowNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
         norm(handle, input_col_major, output_view, params.type, Apply::ALONG_ROWS);
       }
     }
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   NormInputs<T, IdxT> params;
@@ -152,7 +153,7 @@ class ColNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
  public:
   ColNormTest()
     : params(::testing::TestWithParam<NormInputs<T, IdxT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       dots_exp(params.cols, stream),
       dots_act(params.cols, stream)
@@ -186,11 +187,11 @@ class ColNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
         norm(handle, input_col_major, output_view, params.type, Apply::ALONG_COLUMNS);
       }
     }
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   NormInputs<T, IdxT> params;
diff --git a/cpp/test/linalg/normalize.cu b/cpp/test/linalg/normalize.cu
index 24f83a0d0a..63dbd9a19b 100644
--- a/cpp/test/linalg/normalize.cu
+++ b/cpp/test/linalg/normalize.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/linalg/normalize.cuh>
@@ -64,7 +65,7 @@ class RowNormalizeTest : public ::testing::TestWithParam<RowNormalizeInputs<T, I
  public:
   RowNormalizeTest()
     : params(::testing::TestWithParam<RowNormalizeInputs<T, IdxT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       out_exp(params.rows * params.cols, stream),
       out_act(params.rows * params.cols, stream)
@@ -86,11 +87,11 @@ class RowNormalizeTest : public ::testing::TestWithParam<RowNormalizeInputs<T, I
       out_act.data(), params.rows, params.cols);
     raft::linalg::row_normalize(handle, input_view, output_view, params.norm_type);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RowNormalizeInputs<T, IdxT> params;
diff --git a/cpp/test/linalg/power.cu b/cpp/test/linalg/power.cu
index 20b1fa0e45..fd43315e36 100644
--- a/cpp/test/linalg/power.cu
+++ b/cpp/test/linalg/power.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/power.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -72,10 +73,10 @@ template <typename T>
 class PowerTest : public ::testing::TestWithParam<PowerInputs<T>> {
  protected:
   PowerTest()
-    : in1(0, handle.get_stream()),
-      in2(0, handle.get_stream()),
-      out_ref(0, handle.get_stream()),
-      out(0, handle.get_stream())
+    : in1(0, resource::get_cuda_stream(handle)),
+      in2(0, resource::get_cuda_stream(handle)),
+      out_ref(0, resource::get_cuda_stream(handle)),
+      out(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -85,7 +86,7 @@ class PowerTest : public ::testing::TestWithParam<PowerInputs<T>> {
     raft::random::RngState r(params.seed);
     int len = params.len;
 
-    cudaStream_t stream = handle.get_stream();
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     in1.resize(len, stream);
     in2.resize(len, stream);
@@ -109,11 +110,11 @@ class PowerTest : public ::testing::TestWithParam<PowerInputs<T>> {
     power(handle, const_in1_view, const_in2_view, in1_view);
     power_scalar(handle, const_in1_view, in1_view, scalar_view);
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   PowerInputs<T> params;
   rmm::device_uvector<T> in1, in2, out_ref, out;
   int device_count = 0;
diff --git a/cpp/test/linalg/reduce.cu b/cpp/test/linalg/reduce.cu
index 8cdeab5a94..fd1b4e7b45 100644
--- a/cpp/test/linalg/reduce.cu
+++ b/cpp/test/linalg/reduce.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/detail/macros.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/reduce.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -74,7 +75,8 @@ void reduceLaunch(OutType* dots,
   auto input_view_col_major =
     raft::make_device_matrix_view<const InType, IdxType, raft::col_major>(data, rows, cols);
 
-  raft::device_resources handle{stream};
+  raft::resources handle;
+  resource::set_cuda_stream(handle, stream);
 
   if (rowMajor) {
     reduce(handle,
@@ -109,7 +111,7 @@ class ReduceTest : public ::testing::TestWithParam<ReduceInputs<InType, OutType,
  public:
   ReduceTest()
     : params(::testing::TestWithParam<ReduceInputs<InType, OutType, IdxType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       dots_exp(params.alongRows ? params.rows : params.cols, stream),
       dots_act(params.alongRows ? params.rows : params.cols, stream)
@@ -180,11 +182,11 @@ class ReduceTest : public ::testing::TestWithParam<ReduceInputs<InType, OutType,
                  reduce_op,
                  fin_op);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   ReduceInputs<InType, OutType, IdxType> params;
diff --git a/cpp/test/linalg/reduce_cols_by_key.cu b/cpp/test/linalg/reduce_cols_by_key.cu
index be9a8c063b..dbd54a350f 100644
--- a/cpp/test/linalg/reduce_cols_by_key.cu
+++ b/cpp/test/linalg/reduce_cols_by_key.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/reduce_cols_by_key.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -75,8 +76,8 @@ class ReduceColsTest : public ::testing::TestWithParam<ReduceColsInputs<T, IdxT>
   {
     params = ::testing::TestWithParam<ReduceColsInputs<T, IdxT>>::GetParam();
     raft::random::RngState r(params.seed);
-    raft::device_resources handle;
-    auto stream = handle.get_stream();
+    raft::resources handle;
+    auto stream = resource::get_cuda_stream(handle);
     auto nrows  = params.rows;
     auto ncols  = params.cols;
     auto nkeys  = params.nkeys;
diff --git a/cpp/test/linalg/reduce_rows_by_key.cu b/cpp/test/linalg/reduce_rows_by_key.cu
index 69bacb0631..6dbdc51f92 100644
--- a/cpp/test/linalg/reduce_rows_by_key.cu
+++ b/cpp/test/linalg/reduce_rows_by_key.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/reduce_rows_by_key.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -88,7 +89,7 @@ class ReduceRowTest : public ::testing::TestWithParam<ReduceRowsInputs<T>> {
  public:
   ReduceRowTest()
     : params(::testing::TestWithParam<ReduceRowsInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.nobs * params.cols, stream),
       out(params.nkeys * params.cols, stream),
       out_ref(params.nkeys * params.cols, stream),
@@ -140,12 +141,12 @@ class ReduceRowTest : public ::testing::TestWithParam<ReduceRowsInputs<T>> {
 
     reduce_rows_by_key(
       handle, input_view, keys_view, output_view, params.nkeys, scratch_buf_view, weights_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
   ReduceRowsInputs<T> params;
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
 
   int device_count = 0;
diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu
index 48a077aa26..0c66f47c7f 100644
--- a/cpp/test/linalg/rsvd.cu
+++ b/cpp/test/linalg/rsvd.cu
@@ -16,7 +16,8 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/rsvd.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -64,8 +65,8 @@ class RsvdTest : public ::testing::TestWithParam<RsvdInputs<T>> {
 
   void SetUp() override
   {
-    raft::device_resources handle;
-    stream = handle.get_stream();
+    raft::resources handle;
+    stream = resource::get_cuda_stream(handle);
 
     params = ::testing::TestWithParam<RsvdInputs<T>>::GetParam();
     // rSVD seems to be very sensitive to the random number sequence as well!
@@ -272,7 +273,7 @@ TEST_P(RsvdSanityCheckRightVecD, Result)
 typedef RsvdTest<float> RsvdTestSquareMatrixNormF;
 TEST_P(RsvdTestSquareMatrixNormF, Result)
 {
-  raft::device_resources handle;
+  raft::resources handle;
 
   ASSERT_TRUE(raft::linalg::evaluateSVDByL2Norm(handle,
                                                 A.data(),
@@ -283,13 +284,13 @@ TEST_P(RsvdTestSquareMatrixNormF, Result)
                                                 params.n_col,
                                                 params.k,
                                                 4 * params.tolerance,
-                                                handle.get_stream()));
+                                                resource::get_cuda_stream(handle)));
 }
 
 typedef RsvdTest<double> RsvdTestSquareMatrixNormD;
 TEST_P(RsvdTestSquareMatrixNormD, Result)
 {
-  raft::device_resources handle;
+  raft::resources handle;
 
   ASSERT_TRUE(raft::linalg::evaluateSVDByL2Norm(handle,
                                                 A.data(),
@@ -300,7 +301,7 @@ TEST_P(RsvdTestSquareMatrixNormD, Result)
                                                 params.n_col,
                                                 params.k,
                                                 4 * params.tolerance,
-                                                handle.get_stream()));
+                                                resource::get_cuda_stream(handle)));
 }
 
 INSTANTIATE_TEST_CASE_P(RsvdTests, RsvdSanityCheckValF, ::testing::ValuesIn(sanity_inputs_fx));
diff --git a/cpp/test/linalg/sqrt.cu b/cpp/test/linalg/sqrt.cu
index 7ee31da874..e70551fb93 100644
--- a/cpp/test/linalg/sqrt.cu
+++ b/cpp/test/linalg/sqrt.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/sqrt.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -56,13 +57,15 @@ template <typename T>
 class SqrtTest : public ::testing::TestWithParam<SqrtInputs<T>> {
  protected:
   SqrtTest()
-    : in1(0, handle.get_stream()), out_ref(0, handle.get_stream()), out(0, handle.get_stream())
+    : in1(0, resource::get_cuda_stream(handle)),
+      out_ref(0, resource::get_cuda_stream(handle)),
+      out(0, resource::get_cuda_stream(handle))
   {
   }
 
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     params      = ::testing::TestWithParam<SqrtInputs<T>>::GetParam();
     raft::random::RngState r(params.seed);
     int len = params.len;
@@ -82,7 +85,7 @@ class SqrtTest : public ::testing::TestWithParam<SqrtInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   SqrtInputs<T> params;
   rmm::device_uvector<T> in1, out_ref, out;
   int device_count = 0;
diff --git a/cpp/test/linalg/strided_reduction.cu b/cpp/test/linalg/strided_reduction.cu
index c9b32c3585..06096a74a7 100644
--- a/cpp/test/linalg/strided_reduction.cu
+++ b/cpp/test/linalg/strided_reduction.cu
@@ -18,6 +18,7 @@
 #include "reduce.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/strided_reduction.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -37,7 +38,8 @@ template <typename T>
 void stridedReductionLaunch(
   T* dots, const T* data, int cols, int rows, bool inplace, cudaStream_t stream)
 {
-  raft::device_resources handle{stream};
+  raft::resources handle;
+  resource::set_cuda_stream(handle, stream);
   auto dots_view = raft::make_device_vector_view(dots, cols);
   auto data_view = raft::make_device_matrix_view(data, rows, cols);
   strided_reduction(handle, data_view, dots_view, (T)0, inplace, raft::sq_op{});
@@ -48,7 +50,7 @@ class stridedReductionTest : public ::testing::TestWithParam<stridedReductionInp
  public:
   stridedReductionTest()
     : params(::testing::TestWithParam<stridedReductionInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       dots_exp(params.cols, stream),  // expected dot products (from test)
       dots_act(params.cols, stream)   // actual dot products (from prim)
@@ -87,11 +89,11 @@ class stridedReductionTest : public ::testing::TestWithParam<stridedReductionInp
                           raft::identity_op{});
     stridedReductionLaunch(dots_act.data(), data.data(), cols, rows, false, stream);
     stridedReductionLaunch(dots_act.data(), data.data(), cols, rows, true, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   stridedReductionInputs<T> params;
diff --git a/cpp/test/linalg/subtract.cu b/cpp/test/linalg/subtract.cu
index 222e64fc3c..47a2aab82d 100644
--- a/cpp/test/linalg/subtract.cu
+++ b/cpp/test/linalg/subtract.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/subtract.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -73,7 +74,7 @@ class SubtractTest : public ::testing::TestWithParam<SubtractInputs<T>> {
  public:
   SubtractTest()
     : params(::testing::TestWithParam<SubtractInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.len, stream),
       in2(params.len, stream),
       out_ref(params.len, stream),
@@ -104,11 +105,11 @@ class SubtractTest : public ::testing::TestWithParam<SubtractInputs<T>> {
     subtract_scalar(handle, const_out_view, out_view, scalar_view);
     subtract(handle, const_in1_view, const_in2_view, in1_view);
     subtract_scalar(handle, const_in1_view, in1_view, scalar_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SubtractInputs<T> params;
diff --git a/cpp/test/linalg/svd.cu b/cpp/test/linalg/svd.cu
index 9907172956..a092b16e22 100644
--- a/cpp/test/linalg/svd.cu
+++ b/cpp/test/linalg/svd.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/init.cuh>
 #include <raft/linalg/svd.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -44,7 +45,7 @@ class SvdTest : public ::testing::TestWithParam<SvdInputs<T>> {
  public:
   SvdTest()
     : params(::testing::TestWithParam<SvdInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.len, stream),
       left_eig_vectors_qr(params.n_row * params.n_col, stream),
       right_eig_vectors_trans_qr(params.n_col * params.n_col, stream),
@@ -96,7 +97,7 @@ class SvdTest : public ::testing::TestWithParam<SvdInputs<T>> {
                                sing_vals_qr_view,
                                left_eig_vectors_qr_view,
                                right_eig_vectors_trans_qr_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void SetUp() override
@@ -127,7 +128,7 @@ class SvdTest : public ::testing::TestWithParam<SvdInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SvdInputs<T> params;
diff --git a/cpp/test/linalg/ternary_op.cu b/cpp/test/linalg/ternary_op.cu
index 3eadae95ae..7182f52fcd 100644
--- a/cpp/test/linalg/ternary_op.cu
+++ b/cpp/test/linalg/ternary_op.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/ternary_op.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -41,7 +42,7 @@ class ternaryOpTest : public ::testing::TestWithParam<BinaryOpInputs<T>> {
  public:
   ternaryOpTest()
     : params(::testing::TestWithParam<BinaryOpInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       out_add_ref(params.len, stream),
       out_add(params.len, stream),
       out_mul_ref(params.len, stream),
@@ -77,7 +78,7 @@ class ternaryOpTest : public ::testing::TestWithParam<BinaryOpInputs<T>> {
 
  protected:
   BinaryOpInputs<T> params;
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
 
   rmm::device_uvector<T> out_add_ref, out_add, out_mul_ref, out_mul;
diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu
index 6f5800dd8f..6579011856 100644
--- a/cpp/test/linalg/transpose.cu
+++ b/cpp/test/linalg/transpose.cu
@@ -15,8 +15,9 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/transpose.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -49,7 +50,7 @@ class TransposeTest : public ::testing::TestWithParam<TranposeInputs<T>> {
  public:
   TransposeTest()
     : params(::testing::TestWithParam<TranposeInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.len, stream),
       data_trans_ref(params.len, stream),
       data_trans(params.len, stream)
@@ -68,11 +69,11 @@ class TransposeTest : public ::testing::TestWithParam<TranposeInputs<T>> {
 
     transpose(handle, data.data(), data_trans.data(), params.n_row, params.n_col, stream);
     transpose(data.data(), params.n_row, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   TranposeInputs<T> params;
@@ -134,7 +135,7 @@ namespace {
  * @return The transposed matrix.
  */
 template <typename T, typename IndexType, typename LayoutPolicy>
-[[nodiscard]] auto transpose(raft::device_resources const& handle,
+[[nodiscard]] auto transpose(raft::resources const& handle,
                              device_matrix_view<T, IndexType, LayoutPolicy> in)
   -> std::enable_if_t<std::is_floating_point_v<T> &&
                         (std::is_same_v<LayoutPolicy, layout_c_contiguous> ||
@@ -159,7 +160,7 @@ template <typename T, typename IndexType, typename LayoutPolicy>
  * @return The transposed matrix.
  */
 template <typename T, typename IndexType>
-[[nodiscard]] auto transpose(raft::device_resources const& handle,
+[[nodiscard]] auto transpose(raft::resources const& handle,
                              device_matrix_view<T, IndexType, layout_stride> in)
   -> std::enable_if_t<std::is_floating_point_v<T>, device_matrix<T, IndexType, layout_stride>>
 {
@@ -189,7 +190,7 @@ template <typename T, typename IndexType>
 template <typename T, typename LayoutPolicy>
 void test_transpose_with_mdspan()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   auto v = make_device_matrix<T, size_t, LayoutPolicy>(handle, 32, 3);
   T k{0};
   for (size_t i = 0; i < v.extent(0); ++i) {
@@ -224,7 +225,7 @@ namespace {
 template <typename T, typename LayoutPolicy>
 void test_transpose_submatrix()
 {
-  raft::device_resources handle;
+  raft::resources handle;
   auto v = make_device_matrix<T, size_t, LayoutPolicy>(handle, 32, 33);
   T k{0};
   size_t row_beg{3}, row_end{13}, col_beg{2}, col_end{11};
diff --git a/cpp/test/linalg/unary_op.cu b/cpp/test/linalg/unary_op.cu
index 278eac348b..ba1bdad2f1 100644
--- a/cpp/test/linalg/unary_op.cu
+++ b/cpp/test/linalg/unary_op.cu
@@ -17,7 +17,8 @@
 #include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -30,7 +31,7 @@ class UnaryOpTest : public ::testing::TestWithParam<UnaryOpInputs<InType, IdxTyp
  public:
   UnaryOpTest()
     : params(::testing::TestWithParam<UnaryOpInputs<InType, IdxType, OutType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       out_ref(params.len, stream),
       out(params.len, stream)
@@ -43,7 +44,7 @@ class UnaryOpTest : public ::testing::TestWithParam<UnaryOpInputs<InType, IdxTyp
     raft::random::RngState r(params.seed);
     auto len = params.len;
     uniform(handle, r, in.data(), len, InType(-1.0), InType(1.0));
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   virtual void DoTest()
@@ -58,11 +59,11 @@ class UnaryOpTest : public ::testing::TestWithParam<UnaryOpInputs<InType, IdxTyp
              in_view,
              out_view,
              raft::compose_op(raft::cast_op<OutType>(), raft::mul_const_op<InType>(scalar)));
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   UnaryOpInputs<InType, IdxType, OutType> params;
@@ -74,10 +75,7 @@ class UnaryOpTest : public ::testing::TestWithParam<UnaryOpInputs<InType, IdxTyp
 // The enclosing parent function ("DoTest") for an extended __device__ lambda cannot have private or
 // protected access within its class
 template <typename InType, typename IdxType, typename OutType>
-void launchWriteOnlyUnaryOp(const raft::device_resources& handle,
-                            OutType* out,
-                            InType scalar,
-                            IdxType len)
+void launchWriteOnlyUnaryOp(const raft::resources& handle, OutType* out, InType scalar, IdxType len)
 {
   auto out_view = raft::make_device_vector_view(out, len);
   auto op       = [scalar] __device__(OutType * ptr, IdxType idx) {
@@ -96,7 +94,7 @@ class WriteOnlyUnaryOpTest : public UnaryOpTest<OutType, IdxType, OutType> {
     naiveScale(this->out_ref.data(), (OutType*)nullptr, scalar, len, this->stream);
 
     launchWriteOnlyUnaryOp(this->handle, this->out.data(), scalar, len);
-    this->handle.sync_stream(this->stream);
+    resource::sync_stream(this->handle, this->stream);
   }
 };
 
diff --git a/cpp/test/matrix/argmax.cu b/cpp/test/matrix/argmax.cu
index ec27b530d7..40ca358fe1 100644
--- a/cpp/test/matrix/argmax.cu
+++ b/cpp/test/matrix/argmax.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/argmax.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -52,22 +53,22 @@ class ArgMaxTest : public ::testing::TestWithParam<ArgMaxInputs<T, IdxT>> {
     raft::update_device(input.data_handle(),
                         params.input_matrix.data(),
                         params.input_matrix.size(),
-                        handle.get_stream());
+                        resource::get_cuda_stream(handle));
     raft::update_device(expected.data_handle(),
                         params.output_matrix.data(),
                         params.output_matrix.size(),
-                        handle.get_stream());
+                        resource::get_cuda_stream(handle));
 
     auto input_const_view = raft::make_device_matrix_view<const T, std::uint32_t, row_major>(
       input.data_handle(), input.extent(0), input.extent(1));
 
     raft::matrix::argmax(handle, input_const_view, output.view());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   ArgMaxInputs<T, IdxT> params;
 
   raft::device_matrix<T, std::uint32_t, row_major> input;
@@ -88,7 +89,7 @@ TEST_P(ArgMaxTestF, Result)
                           output.data_handle(),
                           params.n_rows,
                           Compare<int>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 typedef ArgMaxTest<double, int> ArgMaxTestD;
@@ -98,7 +99,7 @@ TEST_P(ArgMaxTestD, Result)
                           output.data_handle(),
                           params.n_rows,
                           Compare<int>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 INSTANTIATE_TEST_SUITE_P(ArgMaxTest, ArgMaxTestF, ::testing::ValuesIn(inputsf));
diff --git a/cpp/test/matrix/argmin.cu b/cpp/test/matrix/argmin.cu
index 73f6123167..29013bbdd8 100644
--- a/cpp/test/matrix/argmin.cu
+++ b/cpp/test/matrix/argmin.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/argmin.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -52,22 +53,22 @@ class ArgMinTest : public ::testing::TestWithParam<ArgMinInputs<T, IdxT>> {
     raft::update_device(input.data_handle(),
                         params.input_matrix.data(),
                         params.input_matrix.size(),
-                        handle.get_stream());
+                        resource::get_cuda_stream(handle));
     raft::update_device(expected.data_handle(),
                         params.output_matrix.data(),
                         params.output_matrix.size(),
-                        handle.get_stream());
+                        resource::get_cuda_stream(handle));
 
     auto input_const_view = raft::make_device_matrix_view<const T, std::uint32_t, row_major>(
       input.data_handle(), input.extent(0), input.extent(1));
 
     raft::matrix::argmin(handle, input_const_view, output.view());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   ArgMinInputs<T, IdxT> params;
 
   raft::device_matrix<T, std::uint32_t, row_major> input;
@@ -88,7 +89,7 @@ TEST_P(ArgMinTestF, Result)
                           output.data_handle(),
                           params.n_rows,
                           Compare<int>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 typedef ArgMinTest<double, int> ArgMinTestD;
@@ -98,7 +99,7 @@ TEST_P(ArgMinTestD, Result)
                           output.data_handle(),
                           params.n_rows,
                           Compare<int>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 INSTANTIATE_TEST_SUITE_P(ArgMinTest, ArgMinTestF, ::testing::ValuesIn(inputsf));
diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu
index 9a65918f8f..1ea9b4ae67 100644
--- a/cpp/test/matrix/columnSort.cu
+++ b/cpp/test/matrix/columnSort.cu
@@ -19,7 +19,8 @@
 #include <gtest/gtest.h>
 #include <numeric>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/matrix/col_wise_sort.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
@@ -57,11 +58,11 @@ template <typename T>
 class ColumnSort : public ::testing::TestWithParam<columnSort<T>> {
  protected:
   ColumnSort()
-    : keyIn(0, handle.get_stream()),
-      keySorted(0, handle.get_stream()),
-      keySortGolden(0, handle.get_stream()),
-      valueOut(0, handle.get_stream()),
-      goldenValOut(0, handle.get_stream())
+    : keyIn(0, resource::get_cuda_stream(handle)),
+      keySorted(0, resource::get_cuda_stream(handle)),
+      keySortGolden(0, resource::get_cuda_stream(handle)),
+      valueOut(0, resource::get_cuda_stream(handle)),
+      goldenValOut(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -69,12 +70,12 @@ class ColumnSort : public ::testing::TestWithParam<columnSort<T>> {
   {
     params  = ::testing::TestWithParam<columnSort<T>>::GetParam();
     int len = params.n_row * params.n_col;
-    keyIn.resize(len, handle.get_stream());
-    valueOut.resize(len, handle.get_stream());
-    goldenValOut.resize(len, handle.get_stream());
+    keyIn.resize(len, resource::get_cuda_stream(handle));
+    valueOut.resize(len, resource::get_cuda_stream(handle));
+    goldenValOut.resize(len, resource::get_cuda_stream(handle));
     if (params.testKeys) {
-      keySorted.resize(len, handle.get_stream());
-      keySortGolden.resize(len, handle.get_stream());
+      keySorted.resize(len, resource::get_cuda_stream(handle));
+      keySortGolden.resize(len, resource::get_cuda_stream(handle));
     }
 
     std::vector<T> vals(len);
@@ -97,11 +98,13 @@ class ColumnSort : public ::testing::TestWithParam<columnSort<T>> {
       }
     }
 
-    raft::update_device(keyIn.data(), &vals[0], len, handle.get_stream());
-    raft::update_device(goldenValOut.data(), &cValGolden[0], len, handle.get_stream());
+    raft::update_device(keyIn.data(), &vals[0], len, resource::get_cuda_stream(handle));
+    raft::update_device(
+      goldenValOut.data(), &cValGolden[0], len, resource::get_cuda_stream(handle));
 
     if (params.testKeys)
-      raft::update_device(keySortGolden.data(), &cKeyGolden[0], len, handle.get_stream());
+      raft::update_device(
+        keySortGolden.data(), &cKeyGolden[0], len, resource::get_cuda_stream(handle));
 
     auto key_in_view = raft::make_device_matrix_view<const T, int, row_major>(
       keyIn.data(), params.n_row, params.n_col);
@@ -113,11 +116,11 @@ class ColumnSort : public ::testing::TestWithParam<columnSort<T>> {
     raft::matrix::sort_cols_per_row(
       handle, key_in_view, value_out_view, std::make_optional(key_sorted_view));
 
-    RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+    RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle)));
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   columnSort<T> params;
   rmm::device_uvector<T> keyIn, keySorted, keySortGolden;
   rmm::device_uvector<int> valueOut, goldenValOut;  // valueOut are indexes
diff --git a/cpp/test/matrix/diagonal.cu b/cpp/test/matrix/diagonal.cu
index 118aa7988f..a75e5290ae 100644
--- a/cpp/test/matrix/diagonal.cu
+++ b/cpp/test/matrix/diagonal.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/diagonal.cuh>
 #include <raft/matrix/init.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -62,19 +63,19 @@ class DiagonalTest : public ::testing::TestWithParam<DiagonalInputs<T>> {
                        diag_expected.view(),
                        raft::make_host_scalar_view<T>(&diag_fill_scalar));
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
 
     raft::matrix::set_diagonal(handle, diag_expected_view, input.view());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
 
     raft::matrix::get_diagonal(handle, input_view, diag_actual.view());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   DiagonalInputs<T> params;
 
   int diag_size;
@@ -95,7 +96,7 @@ TEST_P(DiagonalTestF, Result)
                           diag_actual.data_handle(),
                           diag_size,
                           Compare<float>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 typedef DiagonalTest<double> DiagonalTestD;
@@ -105,7 +106,7 @@ TEST_P(DiagonalTestD, Result)
                           diag_actual.data_handle(),
                           diag_size,
                           Compare<double>(),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 
 INSTANTIATE_TEST_SUITE_P(DiagonalTest, DiagonalTestF, ::testing::ValuesIn(inputsf));
diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu
index 37c2067c77..cab96576d2 100644
--- a/cpp/test/matrix/gather.cu
+++ b/cpp/test/matrix/gather.cu
@@ -19,6 +19,7 @@
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/gather.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -78,7 +79,7 @@ template <bool Conditional, bool MapTransform, typename MatrixT, typename MapT,
 class GatherTest : public ::testing::TestWithParam<GatherInputs<IdxT>> {
  protected:
   GatherTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       params(::testing::TestWithParam<GatherInputs<IdxT>>::GetParam()),
       d_in(0, stream),
       d_out_exp(0, stream),
@@ -159,11 +160,11 @@ class GatherTest : public ::testing::TestWithParam<GatherInputs<IdxT>> {
       raft::matrix::gather(handle, in_view, map_view, out_view);
     }
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   GatherInputs<IdxT> params;
   std::vector<MatrixT> h_in, h_out, h_stencil;
diff --git a/cpp/test/matrix/linewise_op.cu b/cpp/test/matrix/linewise_op.cu
index 04a8a91b01..714119086c 100644
--- a/cpp/test/matrix/linewise_op.cu
+++ b/cpp/test/matrix/linewise_op.cu
@@ -21,6 +21,7 @@
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/nvtx.hpp>
 #include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/matrix/linewise_op.cuh>
 #include <raft/random/rng.cuh>
@@ -43,7 +44,7 @@ struct LinewiseTestParams {
 
 template <typename T, typename I, typename ParamsReader>
 struct LinewiseTest : public ::testing::TestWithParam<typename ParamsReader::Params> {
-  const raft::device_resources handle;
+  const raft::resources handle;
   const LinewiseTestParams params;
   rmm::cuda_stream_view stream;
 
@@ -52,7 +53,7 @@ struct LinewiseTest : public ::testing::TestWithParam<typename ParamsReader::Par
       params(
         ParamsReader::read(::testing::TestWithParam<typename ParamsReader::Params>::GetParam())),
       handle(),
-      stream(handle.get_stream())
+      stream(resource::get_cuda_stream(handle))
   {
   }
 
diff --git a/cpp/test/matrix/math.cu b/cpp/test/matrix/math.cu
index cd3d865d80..ecc18cd16a 100644
--- a/cpp/test/matrix/math.cu
+++ b/cpp/test/matrix/math.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/matrix/power.cuh>
@@ -120,7 +121,7 @@ class MathTest : public ::testing::TestWithParam<MathInputs<T>> {
  public:
   MathTest()
     : params(::testing::TestWithParam<MathInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in_power(params.len, stream),
       out_power_ref(params.len, stream),
       in_sqrt(params.len, stream),
@@ -203,11 +204,11 @@ class MathTest : public ::testing::TestWithParam<MathInputs<T>> {
     update_device(out_smallzero_ref.data(), in_small_val_zero_ref_h.data(), 4, stream);
     zero_small_values<T>(handle, in_smallzero_view, out_smallzero_view);
     zero_small_values<T>(handle, inout_smallzero_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MathInputs<T> params;
diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu
index 07ab3c5ce4..a10280a30c 100644
--- a/cpp/test/matrix/matrix.cu
+++ b/cpp/test/matrix/matrix.cu
@@ -17,6 +17,8 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <raft/matrix/copy.cuh>
 #include <raft/random/rng.cuh>
@@ -49,7 +51,7 @@ class MatrixTest : public ::testing::TestWithParam<MatrixInputs<T>> {
  public:
   MatrixTest()
     : params(::testing::TestWithParam<MatrixInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in1(params.n_row * params.n_col, stream),
       in2(params.n_row * params.n_col, stream),
       in1_revr(params.n_row * params.n_col, stream)
@@ -76,11 +78,11 @@ class MatrixTest : public ::testing::TestWithParam<MatrixInputs<T>> {
 
     auto out_trunc_view = raft::make_device_matrix_view<T, int, col_major>(outTrunc.data(), 3, 2);
     trunc_zero_origin<T, int>(handle, in1_view, out_trunc_view);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MatrixInputs<T> params;
@@ -123,16 +125,16 @@ class MatrixCopyRowsTest : public ::testing::Test {
 
  protected:
   MatrixCopyRowsTest()
-    : stream(handle.get_stream()),
-      input(n_cols * n_rows, handle.get_stream()),
-      indices(n_selected, handle.get_stream()),
-      output(n_cols * n_selected, handle.get_stream())
+    : stream(resource::get_cuda_stream(handle)),
+      input(n_cols * n_rows, resource::get_cuda_stream(handle)),
+      indices(n_selected, resource::get_cuda_stream(handle)),
+      output(n_cols * n_selected, resource::get_cuda_stream(handle))
   {
     raft::update_device(indices.data(), indices_host, n_selected, stream);
     // Init input array
     thrust::counting_iterator<idx_t> first(0);
     thrust::device_ptr<math_t> ptr(input.data());
-    thrust::copy(handle.get_thrust_policy(), first, first + n_cols * n_rows, ptr);
+    thrust::copy(resource::get_thrust_policy(handle), first, first + n_cols * n_rows, ptr);
   }
 
   void testCopyRows()
@@ -161,7 +163,7 @@ class MatrixCopyRowsTest : public ::testing::Test {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   int n_rows     = 10;
diff --git a/cpp/test/matrix/norm.cu b/cpp/test/matrix/norm.cu
index ed1c393c4f..e370b7ce0c 100644
--- a/cpp/test/matrix/norm.cu
+++ b/cpp/test/matrix/norm.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/norm.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -55,7 +56,7 @@ class NormTest : public ::testing::TestWithParam<NormInputs<T>> {
  public:
   NormTest()
     : params(::testing::TestWithParam<NormInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream)
   {
   }
@@ -70,11 +71,11 @@ class NormTest : public ::testing::TestWithParam<NormInputs<T>> {
     out_scalar_exp = naiveNorm(h_data.data(), cols, rows);
     auto input = raft::make_device_matrix_view<const T, int>(data.data(), params.rows, params.cols);
     out_scalar_act = l2_norm(handle, input);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   NormInputs<T> params;
diff --git a/cpp/test/matrix/reverse.cu b/cpp/test/matrix/reverse.cu
index f3929c582b..191fc50198 100644
--- a/cpp/test/matrix/reverse.cu
+++ b/cpp/test/matrix/reverse.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/reverse.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -73,7 +74,7 @@ class ReverseTest : public ::testing::TestWithParam<ReverseInputs<T>> {
  public:
   ReverseTest()
     : params(::testing::TestWithParam<ReverseInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream)
   {
   }
@@ -114,11 +115,11 @@ class ReverseTest : public ::testing::TestWithParam<ReverseInputs<T>> {
     }
 
     raft::update_host(act_result.data(), data.data(), len, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   ReverseInputs<T> params;
diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu
index 7a8a5b7aa8..702fd1c407 100644
--- a/cpp/test/matrix/select_k.cu
+++ b/cpp/test/matrix/select_k.cu
@@ -15,10 +15,11 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft_internal/matrix/select_k.cuh>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/sparse/detail/utils.h>
 #include <raft/util/cudart_utils.hpp>
@@ -102,8 +103,8 @@ struct io_computed {
       default: break;
     }
 
-    device_resources handle{};
-    auto stream = handle.get_stream();
+    resources handle{};
+    auto stream = resource::get_cuda_stream(handle);
 
     rmm::device_uvector<KeyT> in_dists_d(in_dists_.size(), stream);
     rmm::device_uvector<IdxT> in_ids_d(in_ids_.size(), stream);
@@ -347,9 +348,9 @@ struct with_ref {
       auto algo = std::get<1>(ps);
       std::vector<KeyT> dists(spec.len * spec.batch_size);
 
-      raft::device_resources handle;
+      raft::resources handle;
       {
-        auto s = handle.get_stream();
+        auto s = resource::get_cuda_stream(handle);
         rmm::device_uvector<KeyT> dists_d(spec.len * spec.batch_size, s);
         raft::random::RngState r(42);
         normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0));
diff --git a/cpp/test/matrix/slice.cu b/cpp/test/matrix/slice.cu
index 58f849a87c..332db379b7 100644
--- a/cpp/test/matrix/slice.cu
+++ b/cpp/test/matrix/slice.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/slice.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -55,7 +56,7 @@ class SliceTest : public ::testing::TestWithParam<SliceInputs<T>> {
  public:
   SliceTest()
     : params(::testing::TestWithParam<SliceInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream)
   {
   }
@@ -90,11 +91,11 @@ class SliceTest : public ::testing::TestWithParam<SliceInputs<T>> {
     slice(handle, input, output, slice_coordinates(row1, col1, row2, col2));
 
     raft::update_host(act_result.data(), d_act_result.data(), d_act_result.size(), stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SliceInputs<T> params;
diff --git a/cpp/test/matrix/triangular.cu b/cpp/test/matrix/triangular.cu
index 82b01181f5..00b8a89b02 100644
--- a/cpp/test/matrix/triangular.cu
+++ b/cpp/test/matrix/triangular.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/init.cuh>
 #include <raft/matrix/triangular.cuh>
 #include <raft/random/rng.cuh>
@@ -55,7 +56,7 @@ class TriangularTest : public ::testing::TestWithParam<TriangularInputs<T>> {
  public:
   TriangularTest()
     : params(::testing::TestWithParam<TriangularInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream)
   {
   }
@@ -87,11 +88,11 @@ class TriangularTest : public ::testing::TestWithParam<TriangularInputs<T>> {
     naive_triangular(h_data, exp_result, rows, cols);
 
     raft::update_host(act_result.data(), d_act_result.data(), k * k, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   TriangularInputs<T> params;
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index 1096dc4fb0..2d161d3794 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -17,6 +17,7 @@
 
 #include "../test_utils.cuh"
 #include "ann_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft_internal/neighbors/naive_knn.cuh>
 
@@ -154,7 +155,7 @@ template <typename DistanceT, typename DataT, typename IdxT>
 class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
  public:
   AnnCagraTest()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       ps(::testing::TestWithParam<AnnCagraInputs>::GetParam()),
       database(0, stream_),
       search_queries(0, stream_)
@@ -189,7 +190,7 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
                                         stream_);
       update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
       update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
-      handle_.sync_stream();
+      resource::sync_stream(handle_);
     }
 
     {
@@ -235,7 +236,7 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
 
         update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_);
         update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_);
-        handle_.sync_stream();
+        resource::sync_stream(handle_);
       }
       // for (int i = 0; i < ps.n_queries; i++) {
       //   //  std::cout << "query " << i << std::end;
@@ -282,18 +283,18 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
       r.uniformInt(database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20), stream_);
       r.uniformInt(search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20), stream_);
     }
-    handle_.sync_stream();
+    resource::sync_stream(handle_);
   }
 
   void TearDown() override
   {
-    handle_.sync_stream();
+    resource::sync_stream(handle_);
     database.resize(0, stream_);
     search_queries.resize(0, stream_);
   }
 
  private:
-  raft::device_resources handle_;
+  raft::resources handle_;
   rmm::cuda_stream_view stream_;
   AnnCagraInputs ps;
   rmm::device_uvector<DataT> database;
diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh
index 4d90c3d7e4..3f10f2cf40 100644
--- a/cpp/test/neighbors/ann_ivf_flat.cuh
+++ b/cpp/test/neighbors/ann_ivf_flat.cuh
@@ -17,6 +17,8 @@
 
 #include "../test_utils.cuh"
 #include "ann_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 
 #include <raft_internal/neighbors/naive_knn.cuh>
 
@@ -67,7 +69,7 @@ template <typename T, typename DataT, typename IdxT>
 class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
  public:
   AnnIVFFlatTest()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       ps(::testing::TestWithParam<AnnIvfFlatInputs<IdxT>>::GetParam()),
       database(0, stream_),
       search_queries(0, stream_)
@@ -98,7 +100,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
                                 stream_);
       update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
       update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
-      handle_.sync_stream(stream_);
+      resource::sync_stream(handle_);
     }
 
     {
@@ -125,7 +127,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
                                ps.num_db_vecs,
                                ps.dim);
 
-        handle_.sync_stream(stream_);
+        resource::sync_stream(handle_);
         approx_knn_search(handle_,
                           distances_ivfflat_dev.data(),
                           indices_ivfflat_dev.data(),
@@ -136,7 +138,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
 
         update_host(distances_ivfflat.data(), distances_ivfflat_dev.data(), queries_size, stream_);
         update_host(indices_ivfflat.data(), indices_ivfflat_dev.data(), queries_size, stream_);
-        handle_.sync_stream(stream_);
+        resource::sync_stream(handle_);
       }
 
       ASSERT_TRUE(eval_neighbours(indices_naive,
@@ -165,10 +167,10 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
         auto idx = ivf_flat::build(handle_, index_params, database_view);
 
         rmm::device_uvector<IdxT> vector_indices(ps.num_db_vecs, stream_);
-        thrust::sequence(handle_.get_thrust_policy(),
+        thrust::sequence(resource::get_thrust_policy(handle_),
                          thrust::device_pointer_cast(vector_indices.data()),
                          thrust::device_pointer_cast(vector_indices.data() + ps.num_db_vecs));
-        handle_.sync_stream(stream_);
+        resource::sync_stream(handle_);
 
         IdxT half_of_data = ps.num_db_vecs / 2;
 
@@ -209,7 +211,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
 
         update_host(distances_ivfflat.data(), distances_ivfflat_dev.data(), queries_size, stream_);
         update_host(indices_ivfflat.data(), indices_ivfflat_dev.data(), queries_size, stream_);
-        handle_.sync_stream(stream_);
+        resource::sync_stream(handle_);
 
         // Test the centroid invariants
         if (index_2.adaptive_centers()) {
@@ -221,7 +223,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
             list_sizes.data(), index_2.list_sizes().data_handle(), index_2.n_lists(), stream_);
           raft::copy(
             list_indices.data(), index_2.inds_ptrs().data_handle(), index_2.n_lists(), stream_);
-          handle_.sync_stream(stream_);
+          resource::sync_stream(handle_);
           for (uint32_t l = 0; l < index_2.n_lists(); l++) {
             if (list_sizes[l] == 0) continue;
             rmm::device_uvector<float> cluster_data(list_sizes[l] * ps.dim, stream_);
@@ -274,18 +276,18 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
       r.uniformInt(database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20), stream_);
       r.uniformInt(search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20), stream_);
     }
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
   }
 
   void TearDown() override
   {
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
     database.resize(0, stream_);
     search_queries.resize(0, stream_);
   }
 
  private:
-  raft::device_resources handle_;
+  raft::resources handle_;
   rmm::cuda_stream_view stream_;
   AnnIvfFlatInputs<IdxT> ps;
   rmm::device_uvector<DataT> database;
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index 90c66ace06..9a6e310303 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -17,6 +17,7 @@
 
 #include "../test_utils.cuh"
 #include "ann_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft_internal/neighbors/naive_knn.cuh>
 
@@ -115,7 +116,7 @@ inline auto operator<<(std::ostream& os, const ivf_pq_inputs& p) -> std::ostream
 
 template <typename T>
 void compare_vectors_l2(
-  const raft::device_resources& res, T a, T b, uint32_t label, double compression_ratio, double eps)
+  const raft::resources& res, T a, T b, uint32_t label, double compression_ratio, double eps)
 {
   auto n_rows = a.extent(0);
   auto dim    = a.extent(1);
@@ -130,7 +131,7 @@ void compare_vectors_l2(
     }
     return sqrt(d / double(dim));
   });
-  res.sync_stream();
+  resource::sync_stream(res);
   for (uint32_t i = 0; i < n_rows; i++) {
     double d = dist(i);
     // The theoretical estimate of the error is hard to come up with,
@@ -141,7 +142,7 @@ void compare_vectors_l2(
 }
 
 template <typename IdxT>
-auto min_output_size(const raft::device_resources& handle,
+auto min_output_size(const raft::resources& handle,
                      const ivf_pq::index<IdxT>& index,
                      uint32_t n_probes) -> IdxT
 {
@@ -157,7 +158,7 @@ template <typename EvalT, typename DataT, typename IdxT>
 class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
  public:
   ivf_pq_test()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       ps(::testing::TestWithParam<ivf_pq_inputs>::GetParam()),
       database(0, stream_),
       search_queries(0, stream_)
@@ -177,7 +178,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
       r.uniformInt(database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20), stream_);
       r.uniformInt(search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20), stream_);
     }
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
   }
 
   void calc_ref()
@@ -199,7 +200,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     update_host(distances_ref.data(), distances_naive_dev.data(), queries_size, stream_);
     indices_ref.resize(queries_size);
     update_host(indices_ref.data(), indices_naive_dev.data(), queries_size, stream_);
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
   }
 
   auto build_only()
@@ -216,7 +217,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
   {
     auto db_indices = make_device_vector<IdxT>(handle_, ps.num_db_vecs);
     linalg::map_offset(handle_, db_indices.view(), identity_op{});
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
     auto size_1 = IdxT(ps.num_db_vecs) / 2;
     auto size_2 = IdxT(ps.num_db_vecs) - size_1;
     auto vecs_1 = database.data();
@@ -415,7 +416,7 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
 
     update_host(distances_ivf_pq.data(), distances_ivf_pq_dev.data(), queries_size, stream_);
     update_host(indices_ivf_pq.data(), indices_ivf_pq_dev.data(), queries_size, stream_);
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
 
     // A very conservative lower bound on recall
     double min_recall =
@@ -479,13 +480,13 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
   void TearDown() override  // NOLINT
   {
     cudaGetLastError();
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
     database.resize(0, stream_);
     search_queries.resize(0, stream_);
   }
 
  private:
-  raft::device_resources handle_;
+  raft::resources handle_;
   rmm::cuda_stream_view stream_;
   ivf_pq_inputs ps;                           // NOLINT
   rmm::device_uvector<DataT> database;        // NOLINT
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index 67df5f2abe..0e54e29c01 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/device_mdarray.hpp>  // raft::make_device_matrix
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/matrix/copy.cuh>
 #include <raft/matrix/detail/select_k.cuh>
@@ -170,7 +171,7 @@ auto eval_neighbours(const std::vector<T>& expected_idx,
 }
 
 template <typename T, typename DistT, typename IdxT>
-auto eval_distances(raft::device_resources const& handle,
+auto eval_distances(raft::resources const& handle,
                     const T* x,              // dataset, n_rows * n_cols
                     const T* queries,        // n_queries * n_cols
                     const IdxT* neighbors,   // n_queries * k
@@ -199,8 +200,9 @@ auto eval_distances(raft::device_resources const& handle,
       static_cast<uint16_t>(std::min<size_t>(raft::ceildiv<size_t>(k, block_dim.y), 32768));
     dim3 grid_dim(raft::ceildiv<size_t>(n_rows, block_dim.x), grid_y, 1);
 
-    naive_distance_kernel<DistT, T, IdxT><<<grid_dim, block_dim, 0, handle.get_stream()>>>(
-      naive_dist.data_handle(), queries + i * n_cols, y.data_handle(), 1, k, n_cols, metric);
+    naive_distance_kernel<DistT, T, IdxT>
+      <<<grid_dim, block_dim, 0, resource::get_cuda_stream(handle)>>>(
+        naive_dist.data_handle(), queries + i * n_cols, y.data_handle(), 1, k, n_cols, metric);
 
     if (!devArrMatch(distances + i * k,
                      naive_dist.data_handle(),
diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/test/neighbors/ball_cover.cu
index 19935154df..338fb8cd82 100644
--- a/cpp/test/neighbors/ball_cover.cu
+++ b/cpp/test/neighbors/ball_cover.cu
@@ -17,6 +17,8 @@
 #include "../test_utils.cuh"
 #include "spatial_data.h"
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/ball_cover.cuh>
 #include <raft/neighbors/brute_force.cuh>
@@ -98,7 +100,7 @@ uint32_t count_discrepancies(value_idx* actual_idx,
 }
 
 template <typename value_t>
-void compute_bfknn(const raft::device_resources& handle,
+void compute_bfknn(const raft::resources& handle,
                    const value_t* X1,
                    const value_t* X2,
                    uint32_t n_rows,
@@ -140,34 +142,47 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam<BallCoverInputs<va
   void basicTest()
   {
     params = ::testing::TestWithParam<BallCoverInputs<value_int>>::GetParam();
-    raft::device_resources handle;
+    raft::resources handle;
 
     uint32_t k         = params.k;
     uint32_t n_centers = 25;
     float weight       = params.weight;
     auto metric        = params.metric;
 
-    rmm::device_uvector<value_t> X(params.n_rows * params.n_cols, handle.get_stream());
-    rmm::device_uvector<uint32_t> Y(params.n_rows, handle.get_stream());
+    rmm::device_uvector<value_t> X(params.n_rows * params.n_cols,
+                                   resource::get_cuda_stream(handle));
+    rmm::device_uvector<uint32_t> Y(params.n_rows, resource::get_cuda_stream(handle));
 
     // Make sure the train and query sets are completely disjoint
-    rmm::device_uvector<value_t> X2(params.n_query * params.n_cols, handle.get_stream());
-    rmm::device_uvector<uint32_t> Y2(params.n_query, handle.get_stream());
-
-    raft::random::make_blobs(
-      X.data(), Y.data(), params.n_rows, params.n_cols, n_centers, handle.get_stream());
-
-    raft::random::make_blobs(
-      X2.data(), Y2.data(), params.n_query, params.n_cols, n_centers, handle.get_stream());
-
-    rmm::device_uvector<value_idx> d_ref_I(params.n_query * k, handle.get_stream());
-    rmm::device_uvector<value_t> d_ref_D(params.n_query * k, handle.get_stream());
+    rmm::device_uvector<value_t> X2(params.n_query * params.n_cols,
+                                    resource::get_cuda_stream(handle));
+    rmm::device_uvector<uint32_t> Y2(params.n_query, resource::get_cuda_stream(handle));
+
+    raft::random::make_blobs(X.data(),
+                             Y.data(),
+                             params.n_rows,
+                             params.n_cols,
+                             n_centers,
+                             resource::get_cuda_stream(handle));
+
+    raft::random::make_blobs(X2.data(),
+                             Y2.data(),
+                             params.n_query,
+                             params.n_cols,
+                             n_centers,
+                             resource::get_cuda_stream(handle));
+
+    rmm::device_uvector<value_idx> d_ref_I(params.n_query * k, resource::get_cuda_stream(handle));
+    rmm::device_uvector<value_t> d_ref_D(params.n_query * k, resource::get_cuda_stream(handle));
 
     if (metric == raft::distance::DistanceType::Haversine) {
       thrust::transform(
-        handle.get_thrust_policy(), X.data(), X.data() + X.size(), X.data(), ToRadians());
-      thrust::transform(
-        handle.get_thrust_policy(), X2.data(), X2.data() + X2.size(), X2.data(), ToRadians());
+        resource::get_thrust_policy(handle), X.data(), X.data() + X.size(), X.data(), ToRadians());
+      thrust::transform(resource::get_thrust_policy(handle),
+                        X2.data(),
+                        X2.data() + X2.size(),
+                        X2.data(),
+                        ToRadians());
     }
 
     compute_bfknn(handle,
@@ -181,11 +196,11 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam<BallCoverInputs<va
                   d_ref_D.data(),
                   d_ref_I.data());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
 
     // Allocate predicted arrays
-    rmm::device_uvector<value_idx> d_pred_I(params.n_query * k, handle.get_stream());
-    rmm::device_uvector<value_t> d_pred_D(params.n_query * k, handle.get_stream());
+    rmm::device_uvector<value_idx> d_pred_I(params.n_query * k, resource::get_cuda_stream(handle));
+    rmm::device_uvector<value_t> d_pred_D(params.n_query * k, resource::get_cuda_stream(handle));
 
     auto X_view =
       raft::make_device_matrix_view<value_t, value_int>(X.data(), params.n_rows, params.n_cols);
@@ -202,13 +217,13 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam<BallCoverInputs<va
     build_index(handle, index);
     knn_query(handle, index, X2_view, d_pred_I_view, d_pred_D_view, k, true);
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
     // What we really want are for the distances to match exactly. The
     // indices may or may not match exactly, depending upon the ordering which
     // can be nondeterministic.
 
-    rmm::device_uvector<uint32_t> discrepancies(params.n_query, handle.get_stream());
-    thrust::fill(handle.get_thrust_policy(),
+    rmm::device_uvector<uint32_t> discrepancies(params.n_query, resource::get_cuda_stream(handle));
+    thrust::fill(resource::get_thrust_policy(handle),
                  discrepancies.data(),
                  discrepancies.data() + discrepancies.size(),
                  0);
@@ -220,7 +235,7 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam<BallCoverInputs<va
                                   params.n_query,
                                   k,
                                   discrepancies.data(),
-                                  handle.get_stream());
+                                  resource::get_cuda_stream(handle));
 
     ASSERT_TRUE(res == 0);
   }
@@ -240,28 +255,33 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam<BallCoverInputs<valu
   void basicTest()
   {
     params = ::testing::TestWithParam<BallCoverInputs<value_int>>::GetParam();
-    raft::device_resources handle;
+    raft::resources handle;
 
     uint32_t k         = params.k;
     uint32_t n_centers = 25;
     float weight       = params.weight;
     auto metric        = params.metric;
 
-    rmm::device_uvector<value_t> X(params.n_rows * params.n_cols, handle.get_stream());
-    rmm::device_uvector<uint32_t> Y(params.n_rows, handle.get_stream());
+    rmm::device_uvector<value_t> X(params.n_rows * params.n_cols,
+                                   resource::get_cuda_stream(handle));
+    rmm::device_uvector<uint32_t> Y(params.n_rows, resource::get_cuda_stream(handle));
 
-    raft::random::make_blobs(
-      X.data(), Y.data(), params.n_rows, params.n_cols, n_centers, handle.get_stream());
+    raft::random::make_blobs(X.data(),
+                             Y.data(),
+                             params.n_rows,
+                             params.n_cols,
+                             n_centers,
+                             resource::get_cuda_stream(handle));
 
-    rmm::device_uvector<value_idx> d_ref_I(params.n_rows * k, handle.get_stream());
-    rmm::device_uvector<value_t> d_ref_D(params.n_rows * k, handle.get_stream());
+    rmm::device_uvector<value_idx> d_ref_I(params.n_rows * k, resource::get_cuda_stream(handle));
+    rmm::device_uvector<value_t> d_ref_D(params.n_rows * k, resource::get_cuda_stream(handle));
 
     auto X_view = raft::make_device_matrix_view<const value_t, value_int>(
       (const value_t*)X.data(), params.n_rows, params.n_cols);
 
     if (metric == raft::distance::DistanceType::Haversine) {
       thrust::transform(
-        handle.get_thrust_policy(), X.data(), X.data() + X.size(), X.data(), ToRadians());
+        resource::get_thrust_policy(handle), X.data(), X.data() + X.size(), X.data(), ToRadians());
     }
 
     compute_bfknn(handle,
@@ -275,11 +295,11 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam<BallCoverInputs<valu
                   d_ref_D.data(),
                   d_ref_I.data());
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
 
     // Allocate predicted arrays
-    rmm::device_uvector<value_idx> d_pred_I(params.n_rows * k, handle.get_stream());
-    rmm::device_uvector<value_t> d_pred_D(params.n_rows * k, handle.get_stream());
+    rmm::device_uvector<value_idx> d_pred_I(params.n_rows * k, resource::get_cuda_stream(handle));
+    rmm::device_uvector<value_t> d_pred_D(params.n_rows * k, resource::get_cuda_stream(handle));
 
     auto d_pred_I_view =
       raft::make_device_matrix_view<value_idx, value_int>(d_pred_I.data(), params.n_rows, k);
@@ -290,13 +310,13 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam<BallCoverInputs<valu
 
     all_knn_query(handle, index, d_pred_I_view, d_pred_D_view, k, true);
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
     // What we really want are for the distances to match exactly. The
     // indices may or may not match exactly, depending upon the ordering which
     // can be nondeterministic.
 
-    rmm::device_uvector<uint32_t> discrepancies(params.n_rows, handle.get_stream());
-    thrust::fill(handle.get_thrust_policy(),
+    rmm::device_uvector<uint32_t> discrepancies(params.n_rows, resource::get_cuda_stream(handle));
+    thrust::fill(resource::get_thrust_policy(handle),
                  discrepancies.data(),
                  discrepancies.data() + discrepancies.size(),
                  0);
@@ -308,7 +328,7 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam<BallCoverInputs<valu
                                        params.n_rows,
                                        k,
                                        discrepancies.data(),
-                                       handle.get_stream());
+                                       resource::get_cuda_stream(handle));
 
     // TODO: There seem to be discrepancies here only when
     // the entire test suite is executed.
diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/test/neighbors/epsilon_neighborhood.cu
index c78a15dd2d..1601037edb 100644
--- a/cpp/test/neighbors/epsilon_neighborhood.cu
+++ b/cpp/test/neighbors/epsilon_neighborhood.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <memory>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/make_blobs.cuh>
 #include <raft/spatial/knn/epsilon_neighborhood.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -42,16 +43,16 @@ template <typename T, typename IdxT>
 class EpsNeighTest : public ::testing::TestWithParam<EpsInputs<T, IdxT>> {
  protected:
   EpsNeighTest()
-    : data(0, handle.get_stream()),
-      adj(0, handle.get_stream()),
-      labels(0, handle.get_stream()),
-      vd(0, handle.get_stream())
+    : data(0, resource::get_cuda_stream(handle)),
+      adj(0, resource::get_cuda_stream(handle)),
+      labels(0, resource::get_cuda_stream(handle)),
+      vd(0, resource::get_cuda_stream(handle))
   {
   }
 
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     param       = ::testing::TestWithParam<EpsInputs<T, IdxT>>::GetParam();
     data.resize(param.n_row * param.n_col, stream);
     labels.resize(param.n_row, stream);
@@ -72,7 +73,7 @@ class EpsNeighTest : public ::testing::TestWithParam<EpsInputs<T, IdxT>> {
                                 false);
   }
 
-  const raft::device_resources handle;
+  const raft::resources handle;
   EpsInputs<T, IdxT> param;
   cudaStream_t stream = 0;
   rmm::device_uvector<T> data;
diff --git a/cpp/test/neighbors/fused_l2_knn.cu b/cpp/test/neighbors/fused_l2_knn.cu
index 9fbccf681d..fd89dc0fc7 100644
--- a/cpp/test/neighbors/fused_l2_knn.cu
+++ b/cpp/test/neighbors/fused_l2_knn.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include "./knn_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/distance/distance_types.hpp>
@@ -48,7 +49,7 @@ template <typename T>
 class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
  public:
   FusedL2KNNTest()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       params_(::testing::TestWithParam<FusedL2KNNInputs>::GetParam()),
       database(params_.num_db_vecs * params_.dim, stream_),
       search_queries(params_.num_queries * params_.dim, stream_),
@@ -129,7 +130,7 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
   }
 
  private:
-  raft::device_resources handle_;
+  raft::resources handle_;
   cudaStream_t stream_ = 0;
   FusedL2KNNInputs params_;
   int num_queries;
diff --git a/cpp/test/neighbors/haversine.cu b/cpp/test/neighbors/haversine.cu
index dc5c8afe18..f1b03a0ab5 100644
--- a/cpp/test/neighbors/haversine.cu
+++ b/cpp/test/neighbors/haversine.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/spatial/knn/detail/haversine_distance.cuh>
 #include <rmm/device_uvector.hpp>
@@ -30,7 +31,7 @@ template <typename value_idx, typename value_t>
 class HaversineKNNTest : public ::testing::Test {
  public:
   HaversineKNNTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       d_train_inputs(0, stream),
       d_ref_I(0, stream),
       d_ref_D(0, stream),
@@ -94,13 +95,13 @@ class HaversineKNNTest : public ::testing::Test {
                                               k,
                                               stream);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void SetUp() override { basicTest(); }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<value_t> d_train_inputs;
diff --git a/cpp/test/neighbors/knn.cu b/cpp/test/neighbors/knn.cu
index a03a761c7e..d187cdd7a5 100644
--- a/cpp/test/neighbors/knn.cu
+++ b/cpp/test/neighbors/knn.cu
@@ -15,6 +15,7 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/logger.hpp>
@@ -62,7 +63,7 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
  public:
   KNNTest()
     : params_(::testing::TestWithParam<KNNInputs>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       actual_labels_(0, stream),
       expected_labels_(0, stream),
       input_(0, stream),
@@ -147,11 +148,11 @@ class KNNTest : public ::testing::TestWithParam<KNNInputs> {
     raft::copy(input_.data(), input_ptr, rows_ * cols_, stream);
     raft::copy(search_data_.data(), input_ptr, rows_ * cols_, stream);
     raft::copy(search_labels_.data(), labels_ptr, rows_, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  private:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   KNNInputs params_;
diff --git a/cpp/test/neighbors/refine.cu b/cpp/test/neighbors/refine.cu
index d868ba06cf..c5865a7618 100644
--- a/cpp/test/neighbors/refine.cu
+++ b/cpp/test/neighbors/refine.cu
@@ -16,11 +16,12 @@
 
 #include "../test_utils.cuh"
 #include "ann_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft_internal/neighbors/refine_helper.cuh>
 
-#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/detail/refine.cuh>
 #include <raft/neighbors/refine.cuh>
@@ -39,7 +40,7 @@ template <typename DataT, typename DistanceT, typename IdxT>
 class RefineTest : public ::testing::TestWithParam<RefineInputs<IdxT>> {
  public:
   RefineTest()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       data(handle_, ::testing::TestWithParam<RefineInputs<IdxT>>::GetParam())
   {
   }
@@ -83,7 +84,7 @@ class RefineTest : public ::testing::TestWithParam<RefineInputs<IdxT>> {
       update_host(
         indices.data(), data.refined_indices.data_handle(), data.refined_indices.size(), stream_);
     }
-    handle_.sync_stream(stream_);
+    resource::sync_stream(handle_);
 
     double min_recall = 1;
 
@@ -98,7 +99,7 @@ class RefineTest : public ::testing::TestWithParam<RefineInputs<IdxT>> {
   }
 
  public:
-  raft::device_resources handle_;
+  raft::resources handle_;
   rmm::cuda_stream_view stream_;
   RefineHelper<DataT, DistanceT, IdxT> data;
 };
diff --git a/cpp/test/neighbors/selection.cu b/cpp/test/neighbors/selection.cu
index a21ff9f99e..5d63338b45 100644
--- a/cpp/test/neighbors/selection.cu
+++ b/cpp/test/neighbors/selection.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <numeric>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/neighbors/detail/selection_faiss.cuh>
 #include <raft/neighbors/detail/selection_faiss_helpers.cuh>  // kFaissMax
 #include <raft/random/rng.cuh>
@@ -48,11 +49,10 @@ std::ostream& operator<<(std::ostream& os, const SelectTestSpec& ss)
 }
 
 template <typename IdxT>
-auto gen_simple_ids(int n_inputs, int input_len, const raft::device_resources& handle)
-  -> std::vector<IdxT>
+auto gen_simple_ids(int n_inputs, int input_len, const raft::resources& handle) -> std::vector<IdxT>
 {
   std::vector<IdxT> out(n_inputs * input_len);
-  auto s = handle.get_stream();
+  auto s = resource::get_cuda_stream(handle);
   rmm::device_uvector<IdxT> out_d(out.size(), s);
   iota_fill(out_d.data(), IdxT(n_inputs), IdxT(input_len), s);
   update_host(out.data(), out_d.data(), out.size(), s);
@@ -65,7 +65,7 @@ struct SelectInOutSimple {
  public:
   bool not_supported = false;
 
-  SelectInOutSimple(std::shared_ptr<raft::device_resources> handle,
+  SelectInOutSimple(std::shared_ptr<raft::resources> handle,
                     const SelectTestSpec& spec,
                     const std::vector<KeyT>& in_dists,
                     const std::vector<KeyT>& out_dists,
@@ -84,7 +84,7 @@ struct SelectInOutSimple {
   auto get_out_ids() -> std::vector<IdxT>& { return out_ids_; }
 
  private:
-  std::shared_ptr<raft::device_resources> handle_;
+  std::shared_ptr<raft::resources> handle_;
   std::vector<KeyT> in_dists_;
   std::vector<IdxT> in_ids_;
   std::vector<KeyT> out_dists_;
@@ -96,7 +96,7 @@ struct SelectInOutComputed {
  public:
   bool not_supported = false;
 
-  SelectInOutComputed(std::shared_ptr<raft::device_resources> handle,
+  SelectInOutComputed(std::shared_ptr<raft::resources> handle,
                       const SelectTestSpec& spec,
                       knn::SelectKAlgo algo,
                       const std::vector<KeyT>& in_dists,
@@ -125,7 +125,7 @@ struct SelectInOutComputed {
       default: break;
     }
 
-    auto stream = handle_.get()->get_stream();
+    auto stream = resource::get_cuda_stream(*handle_);
 
     rmm::device_uvector<KeyT> in_dists_d(in_dists_.size(), stream);
     rmm::device_uvector<IdxT> in_ids_d(in_ids_.size(), stream);
@@ -162,7 +162,7 @@ struct SelectInOutComputed {
   auto get_out_ids() -> std::vector<IdxT>& { return out_ids_; }
 
  private:
-  std::shared_ptr<raft::device_resources> handle_;
+  std::shared_ptr<raft::resources> handle_;
   std::vector<KeyT> in_dists_;
   std::vector<IdxT> in_ids_;
   std::vector<KeyT> out_dists_;
@@ -213,12 +213,12 @@ struct SelectInOutComputed {
 
 template <typename InOut>
 using Params =
-  std::tuple<SelectTestSpec, knn::SelectKAlgo, InOut, std::shared_ptr<raft::device_resources>>;
+  std::tuple<SelectTestSpec, knn::SelectKAlgo, InOut, std::shared_ptr<raft::resources>>;
 
 template <typename KeyT, typename IdxT, template <typename, typename> typename ParamsReader>
 class SelectionTest : public testing::TestWithParam<typename ParamsReader<KeyT, IdxT>::ParamsIn> {
  protected:
-  std::shared_ptr<raft::device_resources> handle_;
+  std::shared_ptr<raft::resources> handle_;
   const SelectTestSpec spec;
   const knn::SelectKAlgo algo;
 
@@ -276,7 +276,7 @@ struct params_simple {
   using InOut = SelectInOutSimple<KeyT, IdxT>;
   using Inputs =
     std::tuple<SelectTestSpec, std::vector<KeyT>, std::vector<KeyT>, std::vector<IdxT>>;
-  using Handle   = std::shared_ptr<raft::device_resources>;
+  using Handle   = std::shared_ptr<raft::resources>;
   using ParamsIn = std::tuple<Inputs, knn::SelectKAlgo, Handle>;
 
   static auto read(ParamsIn ps) -> Params<InOut>
@@ -353,22 +353,21 @@ auto inputs_simple_f = testing::Values(
 
 typedef SelectionTest<float, int, params_simple> SimpleFloatInt;
 TEST_P(SimpleFloatInt, Run) { run(); }
-INSTANTIATE_TEST_CASE_P(
-  SelectionTest,
-  SimpleFloatInt,
-  testing::Combine(inputs_simple_f,
-                   testing::Values(knn::SelectKAlgo::FAISS,
-                                   knn::SelectKAlgo::RADIX_8_BITS,
-                                   knn::SelectKAlgo::RADIX_11_BITS,
-                                   knn::SelectKAlgo::WARP_SORT),
-                   testing::Values(std::make_shared<raft::device_resources>())));
+INSTANTIATE_TEST_CASE_P(SelectionTest,
+                        SimpleFloatInt,
+                        testing::Combine(inputs_simple_f,
+                                         testing::Values(knn::SelectKAlgo::FAISS,
+                                                         knn::SelectKAlgo::RADIX_8_BITS,
+                                                         knn::SelectKAlgo::RADIX_11_BITS,
+                                                         knn::SelectKAlgo::WARP_SORT),
+                                         testing::Values(std::make_shared<raft::resources>())));
 
 template <knn::SelectKAlgo RefAlgo>
 struct with_ref {
   template <typename KeyT, typename IdxT>
   struct params_random {
     using InOut    = SelectInOutComputed<KeyT, IdxT>;
-    using Handle   = std::shared_ptr<raft::device_resources>;
+    using Handle   = std::shared_ptr<raft::resources>;
     using ParamsIn = std::tuple<SelectTestSpec, knn::SelectKAlgo, Handle>;
 
     static auto read(ParamsIn ps) -> Params<InOut>
@@ -380,7 +379,7 @@ struct with_ref {
       std::vector<KeyT> dists(spec.input_len * spec.n_inputs);
 
       {
-        auto s = (*handle.get()).get_stream();
+        auto s = resource::get_cuda_stream(*handle);
         rmm::device_uvector<KeyT> dists_d(spec.input_len * spec.n_inputs, s);
         raft::random::RngState r(42);
         normal(*(handle.get()), r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0));
@@ -450,36 +449,33 @@ auto inputs_random_largek = testing::Values(SelectTestSpec{100, 100000, 1000, tr
 typedef SelectionTest<float, int, with_ref<knn::SelectKAlgo::FAISS>::params_random>
   ReferencedRandomFloatInt;
 TEST_P(ReferencedRandomFloatInt, Run) { run(); }
-INSTANTIATE_TEST_CASE_P(
-  SelectionTest,
-  ReferencedRandomFloatInt,
-  testing::Combine(inputs_random_longlist,
-                   testing::Values(knn::SelectKAlgo::RADIX_8_BITS,
-                                   knn::SelectKAlgo::RADIX_11_BITS,
-                                   knn::SelectKAlgo::WARP_SORT),
-                   testing::Values(std::make_shared<raft::device_resources>())));
+INSTANTIATE_TEST_CASE_P(SelectionTest,
+                        ReferencedRandomFloatInt,
+                        testing::Combine(inputs_random_longlist,
+                                         testing::Values(knn::SelectKAlgo::RADIX_8_BITS,
+                                                         knn::SelectKAlgo::RADIX_11_BITS,
+                                                         knn::SelectKAlgo::WARP_SORT),
+                                         testing::Values(std::make_shared<raft::resources>())));
 
 typedef SelectionTest<double, size_t, with_ref<knn::SelectKAlgo::FAISS>::params_random>
   ReferencedRandomDoubleSizeT;
 TEST_P(ReferencedRandomDoubleSizeT, Run) { run(); }
-INSTANTIATE_TEST_CASE_P(
-  SelectionTest,
-  ReferencedRandomDoubleSizeT,
-  testing::Combine(inputs_random_longlist,
-                   testing::Values(knn::SelectKAlgo::RADIX_8_BITS,
-                                   knn::SelectKAlgo::RADIX_11_BITS,
-                                   knn::SelectKAlgo::WARP_SORT),
-                   testing::Values(std::make_shared<raft::device_resources>())));
+INSTANTIATE_TEST_CASE_P(SelectionTest,
+                        ReferencedRandomDoubleSizeT,
+                        testing::Combine(inputs_random_longlist,
+                                         testing::Values(knn::SelectKAlgo::RADIX_8_BITS,
+                                                         knn::SelectKAlgo::RADIX_11_BITS,
+                                                         knn::SelectKAlgo::WARP_SORT),
+                                         testing::Values(std::make_shared<raft::resources>())));
 
 typedef SelectionTest<double, int, with_ref<knn::SelectKAlgo::FAISS>::params_random>
   ReferencedRandomDoubleInt;
 TEST_P(ReferencedRandomDoubleInt, LargeSize) { run(); }
-INSTANTIATE_TEST_CASE_P(
-  SelectionTest,
-  ReferencedRandomDoubleInt,
-  testing::Combine(inputs_random_largesize,
-                   testing::Values(knn::SelectKAlgo::WARP_SORT),
-                   testing::Values(std::make_shared<raft::device_resources>())));
+INSTANTIATE_TEST_CASE_P(SelectionTest,
+                        ReferencedRandomDoubleInt,
+                        testing::Combine(inputs_random_largesize,
+                                         testing::Values(knn::SelectKAlgo::WARP_SORT),
+                                         testing::Values(std::make_shared<raft::resources>())));
 
 /** TODO: Fix test failure in RAFT CI
  *
@@ -490,10 +486,9 @@ INSTANTIATE_TEST_CASE_P(
 typedef SelectionTest<float, size_t, with_ref<knn::SelectKAlgo::RADIX_11_BITS>::params_random>
   ReferencedRandomFloatSizeT;
 TEST_P(ReferencedRandomFloatSizeT, LargeK) { run(); }
-INSTANTIATE_TEST_CASE_P(
-  SelectionTest,
-  ReferencedRandomFloatSizeT,
-  testing::Combine(inputs_random_largek,
-                   testing::Values(knn::SelectKAlgo::FAISS),
-                   testing::Values(std::make_shared<raft::device_resources>())));
+INSTANTIATE_TEST_CASE_P(SelectionTest,
+                        ReferencedRandomFloatSizeT,
+                        testing::Combine(inputs_random_largek,
+                                         testing::Values(knn::SelectKAlgo::FAISS),
+                                         testing::Values(std::make_shared<raft::resources>())));
 }  // namespace raft::spatial::selection
diff --git a/cpp/test/neighbors/tiled_knn.cu b/cpp/test/neighbors/tiled_knn.cu
index aa46fc29f1..e7c41cbd93 100644
--- a/cpp/test/neighbors/tiled_knn.cu
+++ b/cpp/test/neighbors/tiled_knn.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include "./ann_utils.cuh"
 #include "./knn_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/logger.hpp>
@@ -60,7 +61,7 @@ template <typename T>
 class TiledKNNTest : public ::testing::TestWithParam<TiledKNNInputs> {
  public:
   TiledKNNTest()
-    : stream_(handle_.get_stream()),
+    : stream_(resource::get_cuda_stream(handle_)),
       params_(::testing::TestWithParam<TiledKNNInputs>::GetParam()),
       database(params_.num_db_vecs * params_.dim, stream_),
       search_queries(params_.num_queries * params_.dim, stream_),
@@ -198,7 +199,7 @@ class TiledKNNTest : public ::testing::TestWithParam<TiledKNNInputs> {
   }
 
  private:
-  raft::device_resources handle_;
+  raft::resources handle_;
   cudaStream_t stream_ = 0;
   TiledKNNInputs params_;
   int num_queries;
diff --git a/cpp/test/random/make_blobs.cu b/cpp/test/random/make_blobs.cu
index 0565635e3b..2fb7fdf142 100644
--- a/cpp/test/random/make_blobs.cu
+++ b/cpp/test/random/make_blobs.cu
@@ -18,7 +18,8 @@
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/random/make_blobs.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -81,7 +82,7 @@ class MakeBlobsTest : public ::testing::TestWithParam<MakeBlobsInputs<T>> {
  public:
   MakeBlobsTest()
     : params(::testing::TestWithParam<MakeBlobsInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       mu_vec(make_device_matrix<T, int, layout>(handle, params.n_clusters, params.cols)),
       mean_var(make_device_vector<T, int>(handle, 2 * params.n_clusters * params.cols))
   {
@@ -149,7 +150,7 @@ class MakeBlobsTest : public ::testing::TestWithParam<MakeBlobsInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   MakeBlobsInputs<T> params;
   cudaStream_t stream = 0;
 
diff --git a/cpp/test/random/make_regression.cu b/cpp/test/random/make_regression.cu
index 74aa00171b..0df3b2e7b0 100644
--- a/cpp/test/random/make_regression.cu
+++ b/cpp/test/random/make_regression.cu
@@ -15,12 +15,14 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <thrust/count.h>
 #include <thrust/device_ptr.h>
 #include <thrust/device_vector.h>
 
 #include "../test_utils.cuh"
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/subtract.cuh>
 
@@ -46,7 +48,7 @@ class MakeRegressionTest : public ::testing::TestWithParam<MakeRegressionInputs<
  public:
   MakeRegressionTest()
     : params(::testing::TestWithParam<MakeRegressionInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       values_ret(params.n_samples * params.n_targets, stream),
       values_prod(params.n_samples * params.n_targets, stream)
   {
@@ -86,7 +88,7 @@ class MakeRegressionTest : public ::testing::TestWithParam<MakeRegressionInputs<
 
     // Calculate the values from the data and coefficients (column-major)
     T alpha = (T)1.0, beta = (T)0.0;
-    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(),
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(resource::get_cublas_handle(handle),
                                                      CUBLAS_OP_T,
                                                      CUBLAS_OP_T,
                                                      params.n_samples,
@@ -119,7 +121,7 @@ class MakeRegressionTest : public ::testing::TestWithParam<MakeRegressionInputs<
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
 
   MakeRegressionInputs<T> params;
@@ -181,7 +183,7 @@ class MakeRegressionMdspanTest : public ::testing::TestWithParam<MakeRegressionI
  protected:
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     // Noise must be zero to compare the actual and expected values
     T noise = (T)0.0, tail_strength = (T)0.5;
@@ -218,7 +220,7 @@ class MakeRegressionMdspanTest : public ::testing::TestWithParam<MakeRegressionI
     // Calculate the values from the data and coefficients (column-major)
     T alpha{};
     T beta{};
-    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(),
+    RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(resource::get_cublas_handle(handle),
                                                      CUBLAS_OP_T,
                                                      CUBLAS_OP_T,
                                                      params.n_samples,
@@ -253,9 +255,11 @@ class MakeRegressionMdspanTest : public ::testing::TestWithParam<MakeRegressionI
 
  private:
   MakeRegressionInputs<T> params{::testing::TestWithParam<MakeRegressionInputs<T>>::GetParam()};
-  raft::device_resources handle;
-  rmm::device_uvector<T> values_ret{params.n_samples * params.n_targets, handle.get_stream()};
-  rmm::device_uvector<T> values_prod{params.n_samples * params.n_targets, handle.get_stream()};
+  raft::resources handle;
+  rmm::device_uvector<T> values_ret{params.n_samples * params.n_targets,
+                                    resource::get_cuda_stream(handle)};
+  rmm::device_uvector<T> values_prod{params.n_samples * params.n_targets,
+                                     resource::get_cuda_stream(handle)};
   int zero_count = -1;
 };
 
@@ -271,7 +275,7 @@ TEST_P(MakeRegressionMdspanTestF, Result)
                           params.n_samples,
                           params.n_targets,
                           raft::CompareApprox<float>(params.tolerance),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 INSTANTIATE_TEST_CASE_P(MakeRegressionMdspanTests,
                         MakeRegressionMdspanTestF,
@@ -289,7 +293,7 @@ TEST_P(MakeRegressionMdspanTestD, Result)
                           params.n_samples,
                           params.n_targets,
                           raft::CompareApprox<double>(params.tolerance),
-                          handle.get_stream()));
+                          resource::get_cuda_stream(handle)));
 }
 INSTANTIATE_TEST_CASE_P(MakeRegressionMdspanTests,
                         MakeRegressionMdspanTestD,
diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/test/random/multi_variable_gaussian.cu
index a27dffc7bf..e35d49e453 100644
--- a/cpp/test/random/multi_variable_gaussian.cu
+++ b/cpp/test/random/multi_variable_gaussian.cu
@@ -18,7 +18,10 @@
 #include <cmath>
 #include <gtest/gtest.h>
 #include <iostream>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusolver_dn_handle.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/multi_variable_gaussian.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -84,12 +87,12 @@ class MVGTest : public ::testing::TestWithParam<MVGInputs<T>> {
  public:
   MVGTest()
     : params(::testing::TestWithParam<MVGInputs<T>>::GetParam()),
-      workspace_d(0, handle.get_stream()),
-      P_d(0, handle.get_stream()),
-      x_d(0, handle.get_stream()),
-      X_d(0, handle.get_stream()),
-      Rand_cov(0, handle.get_stream()),
-      Rand_mean(0, handle.get_stream())
+      workspace_d(0, resource::get_cuda_stream(handle)),
+      P_d(0, resource::get_cuda_stream(handle)),
+      x_d(0, resource::get_cuda_stream(handle)),
+      X_d(0, resource::get_cuda_stream(handle)),
+      Rand_cov(0, resource::get_cuda_stream(handle)),
+      Rand_mean(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -104,9 +107,9 @@ class MVGTest : public ::testing::TestWithParam<MVGInputs<T>> {
     corr      = params.corr;
     tolerance = params.tolerance;
 
-    auto cublasH   = handle.get_cublas_handle();
-    auto cusolverH = handle.get_cusolver_dn_handle();
-    auto stream    = handle.get_stream();
+    auto cublasH   = resource::get_cublas_handle(handle);
+    auto cusolverH = resource::get_cusolver_dn_handle(handle);
+    auto stream    = resource::get_cuda_stream(handle);
 
     // preparing to store stuff
     P.resize(dim * dim);
@@ -199,7 +202,7 @@ class MVGTest : public ::testing::TestWithParam<MVGInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   MVGInputs<T> params;
   rmm::device_uvector<T> workspace_d, P_d, x_d, X_d, Rand_cov, Rand_mean;
   std::vector<T> P, x, X;
@@ -226,12 +229,12 @@ class MVGMdspanTest : public ::testing::TestWithParam<MVGInputs<T>> {
 
  public:
   MVGMdspanTest()
-    : workspace_d(0, handle.get_stream()),
-      P_d(0, handle.get_stream()),
-      x_d(0, handle.get_stream()),
-      X_d(0, handle.get_stream()),
-      Rand_cov(0, handle.get_stream()),
-      Rand_mean(0, handle.get_stream())
+    : workspace_d(0, resource::get_cuda_stream(handle)),
+      P_d(0, resource::get_cuda_stream(handle)),
+      x_d(0, resource::get_cuda_stream(handle)),
+      X_d(0, resource::get_cuda_stream(handle)),
+      Rand_cov(0, resource::get_cuda_stream(handle)),
+      Rand_mean(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -244,9 +247,9 @@ class MVGMdspanTest : public ::testing::TestWithParam<MVGInputs<T>> {
     corr        = params.corr;
     tolerance   = params.tolerance;
 
-    auto cublasH   = handle.get_cublas_handle();
-    auto cusolverH = handle.get_cusolver_dn_handle();
-    auto stream    = handle.get_stream();
+    auto cublasH   = resource::get_cublas_handle(handle);
+    auto cusolverH = resource::get_cusolver_dn_handle(handle);
+    auto stream    = resource::get_cuda_stream(handle);
 
     P.resize(dim * dim);
     x.resize(dim);
@@ -327,7 +330,7 @@ class MVGMdspanTest : public ::testing::TestWithParam<MVGInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
 
   MVGInputs<T> params;
   std::vector<T> P, x, X;
@@ -413,8 +416,11 @@ using MVGTestF = MVGTest<float>;
 using MVGTestD = MVGTest<double>;
 TEST_P(MVGTestF, MeanIsCorrectF)
 {
-  EXPECT_TRUE(raft::devArrMatch(
-    x_d.data(), Rand_mean.data(), dim, raft::CompareApprox<float>(tolerance), handle.get_stream()))
+  EXPECT_TRUE(raft::devArrMatch(x_d.data(),
+                                Rand_mean.data(),
+                                dim,
+                                raft::CompareApprox<float>(tolerance),
+                                resource::get_cuda_stream(handle)))
     << " in MeanIsCorrect";
 }
 TEST_P(MVGTestF, CovIsCorrectF)
@@ -424,13 +430,16 @@ TEST_P(MVGTestF, CovIsCorrectF)
                                 dim,
                                 dim,
                                 raft::CompareApprox<float>(tolerance),
-                                handle.get_stream()))
+                                resource::get_cuda_stream(handle)))
     << " in CovIsCorrect";
 }
 TEST_P(MVGTestD, MeanIsCorrectD)
 {
-  EXPECT_TRUE(raft::devArrMatch(
-    x_d.data(), Rand_mean.data(), dim, raft::CompareApprox<double>(tolerance), handle.get_stream()))
+  EXPECT_TRUE(raft::devArrMatch(x_d.data(),
+                                Rand_mean.data(),
+                                dim,
+                                raft::CompareApprox<double>(tolerance),
+                                resource::get_cuda_stream(handle)))
     << " in MeanIsCorrect";
 }
 TEST_P(MVGTestD, CovIsCorrectD)
@@ -440,7 +449,7 @@ TEST_P(MVGTestD, CovIsCorrectD)
                                 dim,
                                 dim,
                                 raft::CompareApprox<double>(tolerance),
-                                handle.get_stream()))
+                                resource::get_cuda_stream(handle)))
     << " in CovIsCorrect";
 }
 
@@ -448,8 +457,11 @@ using MVGMdspanTestF = MVGMdspanTest<float>;
 using MVGMdspanTestD = MVGMdspanTest<double>;
 TEST_P(MVGMdspanTestF, MeanIsCorrectF)
 {
-  EXPECT_TRUE(raft::devArrMatch(
-    x_d.data(), Rand_mean.data(), dim, raft::CompareApprox<float>(tolerance), handle.get_stream()))
+  EXPECT_TRUE(raft::devArrMatch(x_d.data(),
+                                Rand_mean.data(),
+                                dim,
+                                raft::CompareApprox<float>(tolerance),
+                                resource::get_cuda_stream(handle)))
     << " in MeanIsCorrect";
 }
 TEST_P(MVGMdspanTestF, CovIsCorrectF)
@@ -459,13 +471,16 @@ TEST_P(MVGMdspanTestF, CovIsCorrectF)
                                 dim,
                                 dim,
                                 raft::CompareApprox<float>(tolerance),
-                                handle.get_stream()))
+                                resource::get_cuda_stream(handle)))
     << " in CovIsCorrect";
 }
 TEST_P(MVGMdspanTestD, MeanIsCorrectD)
 {
-  EXPECT_TRUE(raft::devArrMatch(
-    x_d.data(), Rand_mean.data(), dim, raft::CompareApprox<double>(tolerance), handle.get_stream()))
+  EXPECT_TRUE(raft::devArrMatch(x_d.data(),
+                                Rand_mean.data(),
+                                dim,
+                                raft::CompareApprox<double>(tolerance),
+                                resource::get_cuda_stream(handle)))
     << " in MeanIsCorrect";
 }
 TEST_P(MVGMdspanTestD, CovIsCorrectD)
@@ -475,7 +490,7 @@ TEST_P(MVGMdspanTestD, CovIsCorrectD)
                                 dim,
                                 dim,
                                 raft::CompareApprox<double>(tolerance),
-                                handle.get_stream()))
+                                resource::get_cuda_stream(handle)))
     << " in CovIsCorrect";
 }
 
diff --git a/cpp/test/random/permute.cu b/cpp/test/random/permute.cu
index 2c5ddf9d5a..f95c8c71c0 100644
--- a/cpp/test/random/permute.cu
+++ b/cpp/test/random/permute.cu
@@ -16,7 +16,8 @@
 
 #include "../test_utils.cuh"
 #include <algorithm>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/permute.cuh>
 #include <raft/random/rng.cuh>
 
@@ -47,13 +48,15 @@ class PermTest : public ::testing::TestWithParam<PermInputs<T>> {
 
  protected:
   PermTest()
-    : in(0, handle.get_stream()), out(0, handle.get_stream()), outPerms(0, handle.get_stream())
+    : in(0, resource::get_cuda_stream(handle)),
+      out(0, resource::get_cuda_stream(handle)),
+      outPerms(0, resource::get_cuda_stream(handle))
   {
   }
 
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     params      = ::testing::TestWithParam<PermInputs<T>>::GetParam();
     // forcefully set needPerms, since we need it for unit-testing!
     if (params.needShuffle) { params.needPerms = true; }
@@ -73,11 +76,11 @@ class PermTest : public ::testing::TestWithParam<PermInputs<T>> {
       uniform(handle, r, in_ptr, len, T(-1.0), T(1.0));
     }
     permute(outPerms_ptr, out_ptr, in_ptr, D, N, params.rowMajor, stream);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   PermInputs<T> params;
   rmm::device_uvector<T> in, out;
   T* in_ptr  = nullptr;
@@ -93,7 +96,9 @@ class PermMdspanTest : public ::testing::TestWithParam<PermInputs<T>> {
 
  protected:
   PermMdspanTest()
-    : in(0, handle.get_stream()), out(0, handle.get_stream()), outPerms(0, handle.get_stream())
+    : in(0, resource::get_cuda_stream(handle)),
+      out(0, resource::get_cuda_stream(handle)),
+      outPerms(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -109,7 +114,7 @@ class PermMdspanTest : public ::testing::TestWithParam<PermInputs<T>> {
  protected:
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     params      = ::testing::TestWithParam<PermInputs<T>>::GetParam();
     // forcefully set needPerms, since we need it for unit-testing!
     if (params.needShuffle) { params.needPerms = true; }
@@ -156,11 +161,11 @@ class PermMdspanTest : public ::testing::TestWithParam<PermInputs<T>> {
       set_up_views_and_test(raft::col_major{});
     }
 
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   PermInputs<T> params;
   rmm::device_uvector<T> in, out;
   T* in_ptr  = nullptr;
diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/test/random/rmat_rectangular_generator.cu
index fd9a8ec732..1af3d2be31 100644
--- a/cpp/test/random/rmat_rectangular_generator.cu
+++ b/cpp/test/random/rmat_rectangular_generator.cu
@@ -16,12 +16,13 @@
 
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <sys/timeb.h>
 #include <vector>
 
 #include "../test_utils.cuh"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rmat_rectangular_generator.cuh>
 #include <raft/random/rng.cuh>
 
@@ -166,7 +167,7 @@ class RmatGenTest : public ::testing::TestWithParam<RmatInputs> {
  public:
   RmatGenTest()
     : handle{},
-      stream{handle.get_stream()},
+      stream{resource::get_cuda_stream(handle)},
       params{::testing::TestWithParam<RmatInputs>::GetParam()},
       out{params.n_edges * 2, stream},
       out_src{params.n_edges, stream},
@@ -244,7 +245,7 @@ class RmatGenTest : public ::testing::TestWithParam<RmatInputs> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RmatInputs params;
@@ -259,7 +260,7 @@ class RmatGenMdspanTest : public ::testing::TestWithParam<RmatInputs> {
  public:
   RmatGenMdspanTest()
     : handle{},
-      stream{handle.get_stream()},
+      stream{resource::get_cuda_stream(handle)},
       params{::testing::TestWithParam<RmatInputs>::GetParam()},
       out{params.n_edges * 2, stream},
       out_src{params.n_edges, stream},
@@ -349,7 +350,7 @@ class RmatGenMdspanTest : public ::testing::TestWithParam<RmatInputs> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RmatInputs params;
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 92f79b1fa0..d04caf96b1 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -15,12 +15,13 @@
  */
 
 #include <memory>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <sys/timeb.h>
 
 #include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
 #include <raft/stats/stddev.cuh>
@@ -97,7 +98,7 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
   RngTest()
     : params(::testing::TestWithParam<RngInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(0, stream),
       stats(2, stream)
   {
@@ -182,7 +183,7 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngInputs<T> params;
@@ -195,7 +196,7 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
   RngMdspanTest()
     : params(::testing::TestWithParam<RngInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(0, stream),
       stats(2, stream)
   {
@@ -276,7 +277,7 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngInputs<T> params;
@@ -393,8 +394,8 @@ TEST(Rng, MeanError)
   int num_experiments = 1024;
   int len             = num_samples * num_experiments;
 
-  raft::device_resources handle;
-  auto stream = handle.get_stream();
+  raft::resources handle;
+  auto stream = resource::get_cuda_stream(handle);
 
   rmm::device_uvector<float> data(len, stream);
   rmm::device_uvector<float> mean_result(num_experiments, stream);
@@ -442,7 +443,7 @@ TEST(Rng, MeanError)
 template <typename T, int len, int scale>
 class ScaledBernoulliTest : public ::testing::Test {
  public:
-  ScaledBernoulliTest() : stream(handle.get_stream()), data(len, stream) {}
+  ScaledBernoulliTest() : stream(resource::get_cuda_stream(handle)), data(len, stream) {}
 
  protected:
   void SetUp() override
@@ -460,7 +461,7 @@ class ScaledBernoulliTest : public ::testing::Test {
       h_data.get(), h_data.get() + len, [](const T& a) { return a < -scale || a > scale; }));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<T> data;
@@ -469,7 +470,7 @@ class ScaledBernoulliTest : public ::testing::Test {
 template <typename T, int len, int scale>
 class ScaledBernoulliMdspanTest : public ::testing::Test {
  public:
-  ScaledBernoulliMdspanTest() : stream(handle.get_stream()), data(len, stream) {}
+  ScaledBernoulliMdspanTest() : stream(resource::get_cuda_stream(handle)), data(len, stream) {}
 
  protected:
   void SetUp() override
@@ -489,7 +490,7 @@ class ScaledBernoulliMdspanTest : public ::testing::Test {
       h_data.get(), h_data.get() + len, [](const T& a) { return a < -scale || a > scale; }));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<T> data;
@@ -510,7 +511,7 @@ TEST_F(ScaledBernoulliMdspanTest2, RangeCheck) { rangeCheck(); }
 template <typename T, int len>
 class BernoulliTest : public ::testing::Test {
  public:
-  BernoulliTest() : stream(handle.get_stream()), data(len, stream) {}
+  BernoulliTest() : stream(resource::get_cuda_stream(handle)), data(len, stream) {}
 
  protected:
   void SetUp() override
@@ -530,7 +531,7 @@ class BernoulliTest : public ::testing::Test {
     delete[] h_data;
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<bool> data;
@@ -539,7 +540,7 @@ class BernoulliTest : public ::testing::Test {
 template <typename T, int len>
 class BernoulliMdspanTest : public ::testing::Test {
  public:
-  BernoulliMdspanTest() : stream(handle.get_stream()), data(len, stream) {}
+  BernoulliMdspanTest() : stream(resource::get_cuda_stream(handle)), data(len, stream) {}
 
  protected:
   void SetUp() override
@@ -561,7 +562,7 @@ class BernoulliMdspanTest : public ::testing::Test {
     ASSERT_TRUE(std::any_of(h_data.get(), h_data.get() + len, [](bool a) { return !a; }));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   rmm::device_uvector<bool> data;
@@ -600,7 +601,7 @@ class RngNormalTableTest : public ::testing::TestWithParam<RngNormalTableInputs<
  public:
   RngNormalTableTest()
     : params(::testing::TestWithParam<RngNormalTableInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       stats(2, stream),
       mu_vec(params.cols, stream)
@@ -637,7 +638,7 @@ class RngNormalTableTest : public ::testing::TestWithParam<RngNormalTableInputs<
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngNormalTableInputs<T> params;
@@ -651,7 +652,7 @@ class RngNormalTableMdspanTest : public ::testing::TestWithParam<RngNormalTableI
  public:
   RngNormalTableMdspanTest()
     : params(::testing::TestWithParam<RngNormalTableInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       stats(2, stream),
       mu_vec(params.cols, stream)
@@ -693,7 +694,7 @@ class RngNormalTableMdspanTest : public ::testing::TestWithParam<RngNormalTableI
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngNormalTableInputs<T> params;
diff --git a/cpp/test/random/rng_discrete.cu b/cpp/test/random/rng_discrete.cu
index b9b283b87d..799f44735e 100644
--- a/cpp/test/random/rng_discrete.cu
+++ b/cpp/test/random/rng_discrete.cu
@@ -18,7 +18,8 @@
 #include <algorithm>
 #include <cmath>
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/random/rng.cuh>
@@ -113,7 +114,7 @@ class RngDiscreteTest : public ::testing::TestWithParam<RngDiscreteInputs<IdxT>>
  public:
   RngDiscreteTest()
     : params(::testing::TestWithParam<RngDiscreteInputs<IdxT>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       out(params.sampled_len, stream),
       weights(params.len, stream),
       histogram(params.len, stream),
@@ -168,7 +169,7 @@ class RngDiscreteTest : public ::testing::TestWithParam<RngDiscreteInputs<IdxT>>
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngDiscreteInputs<IdxT> params;
diff --git a/cpp/test/random/rng_int.cu b/cpp/test/random/rng_int.cu
index 8208b04489..73c95573ec 100644
--- a/cpp/test/random/rng_int.cu
+++ b/cpp/test/random/rng_int.cu
@@ -17,7 +17,8 @@
 #include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -73,7 +74,7 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
   RngTest()
     : params(::testing::TestWithParam<RngInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(0, stream),
       stats(2, stream)
   {
@@ -95,10 +96,10 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
     meanKernel<T, threads><<<raft::ceildiv(params.len, threads), threads, 0, stream>>>(
       stats.data(), data.data(), params.len);
     update_host<float>(h_stats, stats.data(), 2, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
     h_stats[0] /= params.len;
     h_stats[1] = (h_stats[1] / params.len) - (h_stats[0] * h_stats[0]);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void getExpectedMeanVar(float meanvar[2])
@@ -113,7 +114,7 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngInputs<T> params;
@@ -127,7 +128,7 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
  public:
   RngMdspanTest()
     : params(::testing::TestWithParam<RngInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(0, stream),
       stats(2, stream)
   {
@@ -148,10 +149,10 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
     meanKernel<T, threads><<<raft::ceildiv(params.len, threads), threads, 0, stream>>>(
       stats.data(), data.data(), params.len);
     update_host<float>(h_stats, stats.data(), 2, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
     h_stats[0] /= params.len;
     h_stats[1] = (h_stats[1] / params.len) - (h_stats[0] * h_stats[0]);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void getExpectedMeanVar(float meanvar[2])
@@ -166,7 +167,7 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   RngInputs<T> params;
diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/test/random/sample_without_replacement.cu
index dcad32ce8a..0d02567ea6 100644
--- a/cpp/test/random/sample_without_replacement.cu
+++ b/cpp/test/random/sample_without_replacement.cu
@@ -16,7 +16,8 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/random/sample_without_replacement.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -52,7 +53,7 @@ class SWoRTest : public ::testing::TestWithParam<SWoRInputs<T>> {
  public:
   SWoRTest()
     : params(::testing::TestWithParam<SWoRInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       wts(params.len, stream),
       out(params.sampledLen, stream),
@@ -73,11 +74,11 @@ class SWoRTest : public ::testing::TestWithParam<SWoRInputs<T>> {
     sampleWithoutReplacement(
       handle, r, out.data(), outIdx.data(), in.data(), wts.data(), params.sampledLen, params.len);
     update_host(h_outIdx.data(), outIdx.data(), params.sampledLen, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SWoRInputs<T> params;
@@ -91,7 +92,7 @@ class SWoRMdspanTest : public ::testing::TestWithParam<SWoRInputs<T>> {
  public:
   SWoRMdspanTest()
     : params(::testing::TestWithParam<SWoRInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in(params.len, stream),
       wts(params.len, stream),
       out(params.sampledLen, stream),
@@ -142,11 +143,11 @@ class SWoRMdspanTest : public ::testing::TestWithParam<SWoRInputs<T>> {
       sample_without_replacement(handle, r, in_view, std::nullopt, out2_view, std::nullopt);
     }
     update_host(h_outIdx.data(), outIdx.data(), params.sampledLen, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SWoRInputs<T> params;
diff --git a/cpp/test/sparse/add.cu b/cpp/test/sparse/add.cu
index eb10432f3d..9f0ddd551d 100644
--- a/cpp/test/sparse/add.cu
+++ b/cpp/test/sparse/add.cu
@@ -15,8 +15,9 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/csr.hpp>
 #include <raft/sparse/linalg/add.cuh>
 
@@ -48,7 +49,7 @@ class CSRAddTest : public ::testing::TestWithParam<CSRAddInputs<Type_f, Index_>>
  public:
   CSRAddTest()
     : params(::testing::TestWithParam<CSRAddInputs<Type_f, Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       ind_a(params.matrix_a.row_ind.size(), stream),
       ind_ptr_a(params.matrix_a.row_ind_ptr.size(), stream),
       values_a(params.matrix_a.row_ind_ptr.size(), stream),
@@ -126,7 +127,7 @@ class CSRAddTest : public ::testing::TestWithParam<CSRAddInputs<Type_f, Index_>>
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRAddInputs<Type_f, Index_> params;
diff --git a/cpp/test/sparse/convert_coo.cu b/cpp/test/sparse/convert_coo.cu
index ad91d0d284..f05b6dfb08 100644
--- a/cpp/test/sparse/convert_coo.cu
+++ b/cpp/test/sparse/convert_coo.cu
@@ -15,8 +15,9 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/convert/coo.cuh>
 #include <raft/sparse/csr.hpp>
 
@@ -41,7 +42,7 @@ class CSRtoCOOTest : public ::testing::TestWithParam<CSRtoCOOInputs<Index_>> {
  public:
   CSRtoCOOTest()
     : params(::testing::TestWithParam<CSRtoCOOInputs<Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       ex_scan(params.ex_scan.size(), stream),
       verify(params.verify.size(), stream),
       result(params.verify.size(), stream)
@@ -66,7 +67,7 @@ class CSRtoCOOTest : public ::testing::TestWithParam<CSRtoCOOInputs<Index_>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRtoCOOInputs<Index_> params;
diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/test/sparse/convert_csr.cu
index 71d296f665..e312f21b4d 100644
--- a/cpp/test/sparse/convert_csr.cu
+++ b/cpp/test/sparse/convert_csr.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cuda_utils.cuh>
 
 #include <raft/sparse/convert/csr.cuh>
@@ -123,7 +124,7 @@ template <typename index_t>
 class CSRAdjGraphTest : public ::testing::TestWithParam<CSRAdjGraphInputs<index_t>> {
  public:
   CSRAdjGraphTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       params(::testing::TestWithParam<CSRAdjGraphInputs<index_t>>::GetParam()),
       adj(params.n_rows * params.n_cols, stream),
       row_ind(params.n_rows, stream),
@@ -181,7 +182,7 @@ class CSRAdjGraphTest : public ::testing::TestWithParam<CSRAdjGraphInputs<index_
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRAdjGraphInputs<index_t> params;
diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/test/sparse/csr_row_slice.cu
index 73b8691774..1438498628 100644
--- a/cpp/test/sparse/csr_row_slice.cu
+++ b/cpp/test/sparse/csr_row_slice.cu
@@ -15,7 +15,8 @@
  */
 
 #include <cusparse_v2.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <gtest/gtest.h>
@@ -57,7 +58,7 @@ class CSRRowSliceTest : public ::testing::TestWithParam<CSRRowSliceInputs<value_
  public:
   CSRRowSliceTest()
     : params(::testing::TestWithParam<CSRRowSliceInputs<value_idx, value_t>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       indptr(0, stream),
       indices(0, stream),
       data(0, stream),
@@ -98,7 +99,7 @@ class CSRRowSliceTest : public ::testing::TestWithParam<CSRRowSliceInputs<value_
     update_device(
       out_indices_ref.data(), out_indices_ref_h.data(), out_indices_ref_h.size(), stream);
     update_device(out_data_ref.data(), out_data_ref_h.data(), out_data_ref_h.size(), stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void SetUp() override
@@ -124,7 +125,7 @@ class CSRRowSliceTest : public ::testing::TestWithParam<CSRRowSliceInputs<value_
                                              out_data.data(),
                                              stream);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void compare()
@@ -142,7 +143,7 @@ class CSRRowSliceTest : public ::testing::TestWithParam<CSRRowSliceInputs<value_
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   // input data
diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu
index 39a6cc4164..47168c91ef 100644
--- a/cpp/test/sparse/csr_to_dense.cu
+++ b/cpp/test/sparse/csr_to_dense.cu
@@ -15,7 +15,8 @@
  */
 
 #include <cusparse_v2.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <gtest/gtest.h>
@@ -56,7 +57,7 @@ class CSRToDenseTest : public ::testing::TestWithParam<CSRToDenseInputs<value_id
  public:
   CSRToDenseTest()
     : params(::testing::TestWithParam<CSRToDenseInputs<value_idx, value_t>>::GetParam()),
-      stream(raft_handle.get_stream()),
+      stream(resource::get_cuda_stream(raft_handle)),
       indptr(0, stream),
       indices(0, stream),
       data(0, stream),
@@ -116,7 +117,7 @@ class CSRToDenseTest : public ::testing::TestWithParam<CSRToDenseInputs<value_id
   }
 
  protected:
-  raft::device_resources raft_handle;
+  raft::resources raft_handle;
   cudaStream_t stream;
 
   cusparseHandle_t handle;
diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/test/sparse/csr_transpose.cu
index 812c3defea..2379fa2de9 100644
--- a/cpp/test/sparse/csr_transpose.cu
+++ b/cpp/test/sparse/csr_transpose.cu
@@ -15,10 +15,11 @@
  */
 
 #include <cusparse_v2.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <gtest/gtest.h>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/sparse/linalg/transpose.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -56,7 +57,7 @@ class CSRTransposeTest : public ::testing::TestWithParam<CSRTransposeInputs<valu
  public:
   CSRTransposeTest()
     : params(::testing::TestWithParam<CSRTransposeInputs<value_idx, value_t>>::GetParam()),
-      stream(raft_handle.get_stream()),
+      stream(resource::get_cuda_stream(raft_handle)),
       indptr(0, stream),
       indices(0, stream),
       data(0, stream),
@@ -101,7 +102,7 @@ class CSRTransposeTest : public ::testing::TestWithParam<CSRTransposeInputs<valu
 
   void SetUp() override
   {
-    raft::device_resources handle;
+    raft::resources handle;
 
     make_data();
 
@@ -117,7 +118,7 @@ class CSRTransposeTest : public ::testing::TestWithParam<CSRTransposeInputs<valu
                                         params.nnz,
                                         stream);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void compare()
@@ -135,7 +136,7 @@ class CSRTransposeTest : public ::testing::TestWithParam<CSRTransposeInputs<valu
   }
 
  protected:
-  raft::device_resources raft_handle;
+  raft::resources raft_handle;
   cudaStream_t stream;
 
   cusparseHandle_t handle;
diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu
index e768e49f6c..2b7e8233a5 100644
--- a/cpp/test/sparse/dist_coo_spmv.cu
+++ b/cpp/test/sparse/dist_coo_spmv.cu
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/core/operators.cuh>
 #include <raft/core/operators.hpp>
@@ -77,11 +78,11 @@ class SparseDistanceCOOSPMVTest
  public:
   SparseDistanceCOOSPMVTest()
     : dist_config(handle),
-      indptr(0, handle.get_stream()),
-      indices(0, handle.get_stream()),
-      data(0, handle.get_stream()),
-      out_dists(0, handle.get_stream()),
-      out_dists_ref(0, handle.get_stream())
+      indptr(0, resource::get_cuda_stream(handle)),
+      indices(0, resource::get_cuda_stream(handle)),
+      data(0, resource::get_cuda_stream(handle)),
+      out_dists(0, resource::get_cuda_stream(handle)),
+      out_dists_ref(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -101,13 +102,13 @@ class SparseDistanceCOOSPMVTest
   void compute_dist(reduce_f reduce_func, accum_f accum_func, write_f write_func, bool rev = true)
   {
     rmm::device_uvector<value_idx> coo_rows(max(dist_config.b_nnz, dist_config.a_nnz),
-                                            dist_config.handle.get_stream());
+                                            resource::get_cuda_stream(dist_config.handle));
 
     raft::sparse::convert::csr_to_coo(dist_config.b_indptr,
                                       dist_config.b_nrows,
                                       coo_rows.data(),
                                       dist_config.b_nnz,
-                                      dist_config.handle.get_stream());
+                                      resource::get_cuda_stream(dist_config.handle));
 
     strategy_t selected_strategy = make_strategy<strategy_t>();
     detail::balanced_coo_pairwise_generalized_spmv<value_idx, value_t>(out_dists.data(),
@@ -123,7 +124,7 @@ class SparseDistanceCOOSPMVTest
                                         dist_config.a_nrows,
                                         coo_rows.data(),
                                         dist_config.a_nnz,
-                                        dist_config.handle.get_stream());
+                                        resource::get_cuda_stream(dist_config.handle));
 
       detail::balanced_coo_pairwise_generalized_spmv_rev<value_idx, value_t>(out_dists.data(),
                                                                              dist_config,
@@ -167,7 +168,7 @@ class SparseDistanceCOOSPMVTest
                                        out_dists.data(),
                                        dist_config.a_nrows * dist_config.b_nrows,
                                        raft::pow_const_op<value_t>{p},
-                                       dist_config.handle.get_stream());
+                                       resource::get_cuda_stream(dist_config.handle));
 
       } break;
       default: throw raft::exception("Unknown distance");
@@ -181,7 +182,7 @@ class SparseDistanceCOOSPMVTest
     std::vector<value_idx> indices_h = params.input_configuration.indices_h;
     std::vector<value_t> data_h      = params.input_configuration.data_h;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     indptr.resize(indptr_h.size(), stream);
     indices.resize(indices_h.size(), stream);
     data.resize(data_h.size(), stream);
@@ -219,11 +220,11 @@ class SparseDistanceCOOSPMVTest
 
     int out_size = dist_config.a_nrows * dist_config.b_nrows;
 
-    out_dists.resize(out_size, handle.get_stream());
+    out_dists.resize(out_size, resource::get_cuda_stream(handle));
 
     run_spmv();
 
-    RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+    RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle)));
   }
 
   void compare()
@@ -235,7 +236,7 @@ class SparseDistanceCOOSPMVTest
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
 
   // input data
   rmm::device_uvector<value_idx> indptr, indices;
diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu
index 2a973d675c..debb439345 100644
--- a/cpp/test/sparse/distance.cu
+++ b/cpp/test/sparse/distance.cu
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <cusparse_v2.h>
 
@@ -61,11 +62,11 @@ class SparseDistanceTest
   SparseDistanceTest()
     : params(::testing::TestWithParam<SparseDistanceInputs<value_idx, value_t>>::GetParam()),
       dist_config(handle),
-      indptr(0, handle.get_stream()),
-      indices(0, handle.get_stream()),
-      data(0, handle.get_stream()),
-      out_dists(0, handle.get_stream()),
-      out_dists_ref(0, handle.get_stream())
+      indptr(0, resource::get_cuda_stream(handle)),
+      indices(0, resource::get_cuda_stream(handle)),
+      data(0, resource::get_cuda_stream(handle)),
+      out_dists(0, resource::get_cuda_stream(handle)),
+      out_dists_ref(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -88,11 +89,11 @@ class SparseDistanceTest
 
     int out_size = dist_config.a_nrows * dist_config.b_nrows;
 
-    out_dists.resize(out_size, handle.get_stream());
+    out_dists.resize(out_size, resource::get_cuda_stream(handle));
 
     pairwiseDistance(out_dists.data(), dist_config, params.metric, params.metric_arg);
 
-    RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+    RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle)));
   }
 
   void compare()
@@ -110,7 +111,7 @@ class SparseDistanceTest
     std::vector<value_idx> indices_h = params.indices_h;
     std::vector<value_t> data_h      = params.data_h;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     indptr.resize(indptr_h.size(), stream);
     indices.resize(indices_h.size(), stream);
     data.resize(data_h.size(), stream);
@@ -126,10 +127,10 @@ class SparseDistanceTest
     update_device(out_dists_ref.data(),
                   out_dists_ref_h.data(),
                   out_dists_ref_h.size(),
-                  dist_config.handle.get_stream());
+                  resource::get_cuda_stream(dist_config.handle));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
 
   // input data
   rmm::device_uvector<value_idx> indptr, indices;
diff --git a/cpp/test/sparse/filter.cu b/cpp/test/sparse/filter.cu
index 8c106f8868..956bb9c351 100644
--- a/cpp/test/sparse/filter.cu
+++ b/cpp/test/sparse/filter.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <raft/random/rng.cuh>
@@ -50,8 +51,8 @@ const std::vector<SparseFilterInputs<float>> inputsf = {{5, 10, 5, 1234ULL}};
 typedef SparseFilterTests<float> COORemoveZeros;
 TEST_P(COORemoveZeros, Result)
 {
-  raft::device_resources h;
-  auto stream = h.get_stream();
+  raft::resources h;
+  auto stream = resource::get_cuda_stream(h);
   params      = ::testing::TestWithParam<SparseFilterInputs<float>>::GetParam();
 
   float* in_h_vals = new float[params.nnz];
diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu
index 86a2e0cf43..87cebd3519 100644
--- a/cpp/test/sparse/gram.cu
+++ b/cpp/test/sparse/gram.cu
@@ -15,6 +15,7 @@
  */
 
 #if defined RAFT_DISTANCE_COMPILED
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/specializations.cuh>
 #endif
 
@@ -170,7 +171,7 @@ class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
 
     std::vector<math_t> dense_host(dense_size);
     raft::update_host(dense_host.data(), dense, dense_size, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     std::vector<int> indptr_host(n_rows + 1);
     std::vector<int> indices_host(n_rows * n_cols);
@@ -202,7 +203,7 @@ class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
     raft::update_device(indptr, indptr_host.data(), n_rows + 1, stream);
     raft::update_device(indices, indices_host.data(), nnz, stream);
     raft::update_device(data, data_host.data(), nnz, stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     return nnz;
   }
@@ -287,13 +288,13 @@ class GramMatrixTest : public ::testing::TestWithParam<GramMatrixInputs> {
                           stream,
                           handle);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     ASSERT_TRUE(raft::devArrMatchHost(
       gram_host.data(), gram.data(), gram.size(), raft::CompareApprox<math_t>(1e-6f)));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   GramMatrixInputs params;
 
diff --git a/cpp/test/sparse/mst.cu b/cpp/test/sparse/mst.cu
index 0a80846440..de0694ca10 100644
--- a/cpp/test/sparse/mst.cu
+++ b/cpp/test/sparse/mst.cu
@@ -15,6 +15,7 @@
  */
 
 #include <bits/stdc++.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
@@ -22,7 +23,7 @@
 #include <rmm/device_uvector.hpp>
 #include <vector>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/mst/mst.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -137,30 +138,50 @@ class MSTTest : public ::testing::TestWithParam<MSTTestInput<vertex_t, edge_t, w
     v = static_cast<vertex_t>((csr_d.offsets.size() / sizeof(vertex_t)) - 1);
     e = static_cast<edge_t>(csr_d.indices.size() / sizeof(edge_t));
 
-    rmm::device_uvector<vertex_t> mst_src(2 * v - 2, handle.get_stream());
-    rmm::device_uvector<vertex_t> mst_dst(2 * v - 2, handle.get_stream());
-    rmm::device_uvector<vertex_t> color(v, handle.get_stream());
+    rmm::device_uvector<vertex_t> mst_src(2 * v - 2, resource::get_cuda_stream(handle));
+    rmm::device_uvector<vertex_t> mst_dst(2 * v - 2, resource::get_cuda_stream(handle));
+    rmm::device_uvector<vertex_t> color(v, resource::get_cuda_stream(handle));
 
     RAFT_CUDA_TRY(cudaMemsetAsync(mst_src.data(),
                                   std::numeric_limits<vertex_t>::max(),
                                   mst_src.size() * sizeof(vertex_t),
-                                  handle.get_stream()));
+                                  resource::get_cuda_stream(handle)));
     RAFT_CUDA_TRY(cudaMemsetAsync(mst_dst.data(),
                                   std::numeric_limits<vertex_t>::max(),
                                   mst_dst.size() * sizeof(vertex_t),
-                                  handle.get_stream()));
-    RAFT_CUDA_TRY(
-      cudaMemsetAsync(color.data(), 0, color.size() * sizeof(vertex_t), handle.get_stream()));
+                                  resource::get_cuda_stream(handle)));
+    RAFT_CUDA_TRY(cudaMemsetAsync(
+      color.data(), 0, color.size() * sizeof(vertex_t), resource::get_cuda_stream(handle)));
 
     vertex_t* color_ptr = thrust::raw_pointer_cast(color.data());
 
     if (iterations == 0) {
       MST_solver<vertex_t, edge_t, weight_t, float> symmetric_solver(
-        handle, offsets, indices, weights, v, e, color_ptr, handle.get_stream(), true, true, 0);
+        handle,
+        offsets,
+        indices,
+        weights,
+        v,
+        e,
+        color_ptr,
+        resource::get_cuda_stream(handle),
+        true,
+        true,
+        0);
       auto symmetric_result = symmetric_solver.solve();
 
       MST_solver<vertex_t, edge_t, weight_t, float> non_symmetric_solver(
-        handle, offsets, indices, weights, v, e, color_ptr, handle.get_stream(), false, true, 0);
+        handle,
+        offsets,
+        indices,
+        weights,
+        v,
+        e,
+        color_ptr,
+        resource::get_cuda_stream(handle),
+        false,
+        true,
+        0);
       auto non_symmetric_result = non_symmetric_solver.solve();
 
       EXPECT_LE(symmetric_result.n_edges, 2 * v - 2);
@@ -168,45 +189,66 @@ class MSTTest : public ::testing::TestWithParam<MSTTestInput<vertex_t, edge_t, w
 
       return std::make_pair(std::move(symmetric_result), std::move(non_symmetric_result));
     } else {
-      MST_solver<vertex_t, edge_t, weight_t, float> intermediate_solver(handle,
-                                                                        offsets,
-                                                                        indices,
-                                                                        weights,
-                                                                        v,
-                                                                        e,
-                                                                        color_ptr,
-                                                                        handle.get_stream(),
-                                                                        true,
-                                                                        true,
-                                                                        iterations);
+      MST_solver<vertex_t, edge_t, weight_t, float> intermediate_solver(
+        handle,
+        offsets,
+        indices,
+        weights,
+        v,
+        e,
+        color_ptr,
+        resource::get_cuda_stream(handle),
+        true,
+        true,
+        iterations);
       auto intermediate_result = intermediate_solver.solve();
 
       MST_solver<vertex_t, edge_t, weight_t, float> symmetric_solver(
-        handle, offsets, indices, weights, v, e, color_ptr, handle.get_stream(), true, false, 0);
+        handle,
+        offsets,
+        indices,
+        weights,
+        v,
+        e,
+        color_ptr,
+        resource::get_cuda_stream(handle),
+        true,
+        false,
+        0);
       auto symmetric_result = symmetric_solver.solve();
 
       // symmetric_result.n_edges += intermediate_result.n_edges;
       auto total_edge_size = symmetric_result.n_edges + intermediate_result.n_edges;
-      symmetric_result.src.resize(total_edge_size, handle.get_stream());
-      symmetric_result.dst.resize(total_edge_size, handle.get_stream());
-      symmetric_result.weights.resize(total_edge_size, handle.get_stream());
+      symmetric_result.src.resize(total_edge_size, resource::get_cuda_stream(handle));
+      symmetric_result.dst.resize(total_edge_size, resource::get_cuda_stream(handle));
+      symmetric_result.weights.resize(total_edge_size, resource::get_cuda_stream(handle));
 
       raft::copy(symmetric_result.src.data() + symmetric_result.n_edges,
                  intermediate_result.src.data(),
                  intermediate_result.n_edges,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
       raft::copy(symmetric_result.dst.data() + symmetric_result.n_edges,
                  intermediate_result.dst.data(),
                  intermediate_result.n_edges,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
       raft::copy(symmetric_result.weights.data() + symmetric_result.n_edges,
                  intermediate_result.weights.data(),
                  intermediate_result.n_edges,
-                 handle.get_stream());
+                 resource::get_cuda_stream(handle));
       symmetric_result.n_edges = total_edge_size;
 
       MST_solver<vertex_t, edge_t, weight_t, float> non_symmetric_solver(
-        handle, offsets, indices, weights, v, e, color_ptr, handle.get_stream(), false, true, 0);
+        handle,
+        offsets,
+        indices,
+        weights,
+        v,
+        e,
+        color_ptr,
+        resource::get_cuda_stream(handle),
+        false,
+        true,
+        0);
       auto non_symmetric_result = non_symmetric_solver.solve();
 
       EXPECT_LE(symmetric_result.n_edges, 2 * v - 2);
@@ -223,13 +265,13 @@ class MSTTest : public ::testing::TestWithParam<MSTTestInput<vertex_t, edge_t, w
 
     csr_d.offsets = rmm::device_buffer(mst_input.csr_h.offsets.data(),
                                        mst_input.csr_h.offsets.size() * sizeof(edge_t),
-                                       handle.get_stream());
+                                       resource::get_cuda_stream(handle));
     csr_d.indices = rmm::device_buffer(mst_input.csr_h.indices.data(),
                                        mst_input.csr_h.indices.size() * sizeof(vertex_t),
-                                       handle.get_stream());
+                                       resource::get_cuda_stream(handle));
     csr_d.weights = rmm::device_buffer(mst_input.csr_h.weights.data(),
                                        mst_input.csr_h.weights.size() * sizeof(weight_t),
-                                       handle.get_stream());
+                                       resource::get_cuda_stream(handle));
   }
 
   void TearDown() override {}
@@ -241,7 +283,7 @@ class MSTTest : public ::testing::TestWithParam<MSTTestInput<vertex_t, edge_t, w
   edge_t e;
   int iterations;
 
-  raft::device_resources handle;
+  raft::resources handle;
 };
 
 // connected components tests
diff --git a/cpp/test/sparse/neighbors/brute_force.cu b/cpp/test/sparse/neighbors/brute_force.cu
index 49284a498b..ed5b92afc2 100644
--- a/cpp/test/sparse/neighbors/brute_force.cu
+++ b/cpp/test/sparse/neighbors/brute_force.cu
@@ -16,6 +16,7 @@
 
 #include <cusparse_v2.h>
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "../../test_utils.cuh"
 #include <raft/distance/distance_types.hpp>
@@ -60,13 +61,13 @@ class SparseKNNTest : public ::testing::TestWithParam<SparseKNNInputs<value_idx,
  public:
   SparseKNNTest()
     : params(::testing::TestWithParam<SparseKNNInputs<value_idx, value_t>>::GetParam()),
-      indptr(0, handle.get_stream()),
-      indices(0, handle.get_stream()),
-      data(0, handle.get_stream()),
-      out_indices(0, handle.get_stream()),
-      out_dists(0, handle.get_stream()),
-      out_indices_ref(0, handle.get_stream()),
-      out_dists_ref(0, handle.get_stream())
+      indptr(0, resource::get_cuda_stream(handle)),
+      indices(0, resource::get_cuda_stream(handle)),
+      data(0, resource::get_cuda_stream(handle)),
+      out_indices(0, resource::get_cuda_stream(handle)),
+      out_dists(0, resource::get_cuda_stream(handle)),
+      out_indices_ref(0, resource::get_cuda_stream(handle)),
+      out_dists_ref(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -99,7 +100,7 @@ class SparseKNNTest : public ::testing::TestWithParam<SparseKNNInputs<value_idx,
                                                                  params.batch_size_query,
                                                                  params.metric);
 
-    RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+    RAFT_CUDA_TRY(cudaStreamSynchronize(resource::get_cuda_stream(handle)));
   }
 
   void compare()
@@ -117,7 +118,7 @@ class SparseKNNTest : public ::testing::TestWithParam<SparseKNNInputs<value_idx,
     std::vector<value_idx> indices_h = params.indices_h;
     std::vector<value_t> data_h      = params.data_h;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     indptr.resize(indptr_h.size(), stream);
     indices.resize(indices_h.size(), stream);
     data.resize(data_h.size(), stream);
@@ -140,7 +141,7 @@ class SparseKNNTest : public ::testing::TestWithParam<SparseKNNInputs<value_idx,
     out_indices.resize(n_rows * k, stream);
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
 
   int n_rows, nnz, k;
 
diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu
index e14cd9a180..373963b653 100644
--- a/cpp/test/sparse/neighbors/connect_components.cu
+++ b/cpp/test/sparse/neighbors/connect_components.cu
@@ -24,6 +24,7 @@
 #undef RAFT_EXPLICIT_INSTANTIATE_ONLY
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <cub/cub.cuh>
 
@@ -65,17 +66,18 @@ class ConnectComponentsTest
  protected:
   void basicTest()
   {
-    raft::device_resources handle;
+    raft::resources handle;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     params = ::testing::TestWithParam<ConnectComponentsInputs<value_t, value_idx>>::GetParam();
 
-    raft::sparse::COO<value_t, value_idx> out_edges(handle.get_stream());
+    raft::sparse::COO<value_t, value_idx> out_edges(resource::get_cuda_stream(handle));
 
-    rmm::device_uvector<value_t> data(params.n_row * params.n_col, handle.get_stream());
+    rmm::device_uvector<value_t> data(params.n_row * params.n_col,
+                                      resource::get_cuda_stream(handle));
 
-    raft::copy(data.data(), params.data.data(), data.size(), handle.get_stream());
+    raft::copy(data.data(), params.data.data(), data.size(), resource::get_cuda_stream(handle));
 
     rmm::device_uvector<value_idx> indptr(params.n_row + 1, stream);
 
@@ -137,7 +139,7 @@ class ConnectComponentsTest
                                                                     false,
                                                                     false);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     // The sum of edges for both MST runs should be n_rows - 1
     final_edges = output_mst.n_edges + mst_coo.n_edges;
diff --git a/cpp/test/sparse/neighbors/knn_graph.cu b/cpp/test/sparse/neighbors/knn_graph.cu
index aadb00879b..bb190b7bc1 100644
--- a/cpp/test/sparse/neighbors/knn_graph.cu
+++ b/cpp/test/sparse/neighbors/knn_graph.cu
@@ -16,6 +16,7 @@
 
 #include "../../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -61,7 +62,7 @@ class KNNGraphTest : public ::testing::TestWithParam<KNNGraphInputs<value_idx, v
  public:
   KNNGraphTest()
     : params(::testing::TestWithParam<KNNGraphInputs<value_idx, value_t>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       X(0, stream)
   {
     X.resize(params.X.size(), stream);
@@ -87,13 +88,13 @@ class KNNGraphTest : public ::testing::TestWithParam<KNNGraphInputs<value_idx, v
       out->rows(), out->cols(), out->vals(), out->nnz, sum.data());
 
     sum_h = sum.value(stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void TearDown() override { delete out; }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   // input data
diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu
index 65d857652c..10003b1fb6 100644
--- a/cpp/test/sparse/norm.cu
+++ b/cpp/test/sparse/norm.cu
@@ -15,10 +15,11 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "../test_utils.cuh"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/norm_types.hpp>
 #include <raft/sparse/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -42,7 +43,7 @@ class CSRRowNormTest : public ::testing::TestWithParam<CSRRowNormInputs<Type_f,
  public:
   CSRRowNormTest()
     : params(::testing::TestWithParam<CSRRowNormInputs<Type_f, Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.data.size(), stream),
       verify(params.indptr.size() - 1, stream),
       indptr(params.indptr.size(), stream),
@@ -70,7 +71,7 @@ class CSRRowNormTest : public ::testing::TestWithParam<CSRRowNormInputs<Type_f,
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRRowNormInputs<Type_f, Index_> params;
diff --git a/cpp/test/sparse/normalize.cu b/cpp/test/sparse/normalize.cu
index 91b7b09fcc..77ddda225a 100644
--- a/cpp/test/sparse/normalize.cu
+++ b/cpp/test/sparse/normalize.cu
@@ -15,10 +15,11 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "../test_utils.cuh"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/csr.hpp>
 #include <raft/sparse/linalg/norm.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -44,7 +45,7 @@ class CSRRowNormalizeTest : public ::testing::TestWithParam<CSRRowNormalizeInput
  public:
   CSRRowNormalizeTest()
     : params(::testing::TestWithParam<CSRRowNormalizeInputs<Type_f, Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       in_vals(params.in_vals.size(), stream),
       verify(params.verify.size(), stream),
       ex_scan(params.ex_scan.size(), stream),
@@ -81,7 +82,7 @@ class CSRRowNormalizeTest : public ::testing::TestWithParam<CSRRowNormalizeInput
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRRowNormalizeInputs<Type_f, Index_> params;
diff --git a/cpp/test/sparse/reduce.cu b/cpp/test/sparse/reduce.cu
index 6dc67dbbd8..7713d96821 100644
--- a/cpp/test/sparse/reduce.cu
+++ b/cpp/test/sparse/reduce.cu
@@ -15,11 +15,12 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include "../test_utils.cuh"
 #include <iostream>
 #include <limits>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/op/reduce.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -51,9 +52,9 @@ class SparseReduceTest : public ::testing::TestWithParam<SparseReduceInputs<valu
 
   void Run()
   {
-    raft::device_resources handle;
+    raft::resources handle;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     rmm::device_uvector<value_idx> in_rows(params.in_rows.size(), stream);
     rmm::device_uvector<value_idx> in_cols(params.in_cols.size(), stream);
diff --git a/cpp/test/sparse/row_op.cu b/cpp/test/sparse/row_op.cu
index e09af0d9ff..4d15a26b9d 100644
--- a/cpp/test/sparse/row_op.cu
+++ b/cpp/test/sparse/row_op.cu
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/sparse/csr.hpp>
 #include <raft/sparse/op/row_op.cuh>
@@ -56,7 +57,7 @@ class CSRRowOpTest : public ::testing::TestWithParam<CSRRowOpInputs<Type_f, Inde
  public:
   CSRRowOpTest()
     : params(::testing::TestWithParam<CSRRowOpInputs<Type_f, Index_>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       verify(params.verify.size(), stream),
       ex_scan(params.ex_scan.size(), stream),
       result(params.verify.size(), stream)
@@ -82,7 +83,7 @@ class CSRRowOpTest : public ::testing::TestWithParam<CSRRowOpInputs<Type_f, Inde
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   CSRRowOpInputs<Type_f, Index_> params;
diff --git a/cpp/test/sparse/sort.cu b/cpp/test/sparse/sort.cu
index 319c96bc02..2e26225e78 100644
--- a/cpp/test/sparse/sort.cu
+++ b/cpp/test/sparse/sort.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -51,8 +52,8 @@ TEST_P(COOSort, Result)
 {
   params = ::testing::TestWithParam<SparseSortInput<float>>::GetParam();
   raft::random::RngState r(params.seed);
-  raft::device_resources h;
-  auto stream = h.get_stream();
+  raft::resources h;
+  auto stream = resource::get_cuda_stream(h);
 
   rmm::device_uvector<int> in_rows(params.nnz, stream);
   rmm::device_uvector<int> in_cols(params.nnz, stream);
diff --git a/cpp/test/sparse/spectral_matrix.cu b/cpp/test/sparse/spectral_matrix.cu
index 3b044e3974..0bed73a722 100644
--- a/cpp/test/sparse/spectral_matrix.cu
+++ b/cpp/test/sparse/spectral_matrix.cu
@@ -17,7 +17,9 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <memory>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_id.hpp>
+#include <raft/core/resources.hpp>
 
 #include <raft/spectral/matrix_wrappers.hpp>
 
@@ -39,8 +41,8 @@ TEST(Raft, SpectralMatrices)
   using index_type = int;
   using value_type = double;
 
-  raft::device_resources h;
-  ASSERT_EQ(0, h.get_device());
+  raft::resources h;
+  ASSERT_EQ(0, raft::resource::get_device_id(h));
 
   csr_view_t<index_type, value_type> csr_v{nullptr, nullptr, nullptr, 0, 0};
 
@@ -57,7 +59,7 @@ TEST(Raft, SpectralMatrices)
   ASSERT_EQ(nullptr, sm1.row_offsets_);
   ASSERT_EQ(nullptr, sm2.row_offsets_);
 
-  auto stream = h.get_stream();
+  auto stream = resource::get_cuda_stream(h);
 
   auto cnstr_lm1 = [&h, ro, ci, vs, nrows, nnz](void) {
     laplacian_matrix_t<index_type, value_type> lm1{h, ro, ci, vs, nrows, nnz};
diff --git a/cpp/test/sparse/spgemmi.cu b/cpp/test/sparse/spgemmi.cu
index e0aa4bc43b..71c18dd9ac 100644
--- a/cpp/test/sparse/spgemmi.cu
+++ b/cpp/test/sparse/spgemmi.cu
@@ -15,10 +15,12 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cusparse_handle.hpp>
 
 #include "../test_utils.cuh"
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/transpose.cuh>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cudart_utils.hpp>
@@ -38,7 +40,8 @@ template <typename data_t>
 class SPGemmiTest : public ::testing::TestWithParam<SPGemmiInputs> {
  public:
   SPGemmiTest()
-    : params(::testing::TestWithParam<SPGemmiInputs>::GetParam()), stream(handle.get_stream())
+    : params(::testing::TestWithParam<SPGemmiInputs>::GetParam()),
+      stream(resource::get_cuda_stream(handle))
   {
   }
 
@@ -97,7 +100,7 @@ class SPGemmiTest : public ::testing::TestWithParam<SPGemmiInputs> {
 
     //--------------------------------------------------------------------------
     // execute gemmi
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsegemmi(handle.get_cusparse_handle(),
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsegemmi(resource::get_cusparse_handle(handle),
                                                           A_num_rows,
                                                           B_num_cols,
                                                           A_num_cols,
@@ -111,7 +114,7 @@ class SPGemmiTest : public ::testing::TestWithParam<SPGemmiInputs> {
                                                           &beta,
                                                           dC.data(),
                                                           ldc,
-                                                          handle.get_stream()));
+                                                          resource::get_cuda_stream(handle)));
 
     //--------------------------------------------------------------------------
     // result check
@@ -120,7 +123,7 @@ class SPGemmiTest : public ::testing::TestWithParam<SPGemmiInputs> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SPGemmiInputs params;
diff --git a/cpp/test/sparse/symmetrize.cu b/cpp/test/sparse/symmetrize.cu
index 80a512a019..29549d057d 100644
--- a/cpp/test/sparse/symmetrize.cu
+++ b/cpp/test/sparse/symmetrize.cu
@@ -15,6 +15,7 @@
  */
 
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/sparse/convert/coo.cuh>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/linalg/symmetrize.cuh>
@@ -63,7 +64,7 @@ class SparseSymmetrizeTest
  public:
   SparseSymmetrizeTest()
     : params(::testing::TestWithParam<SparseSymmetrizeInputs<value_idx, value_t>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       indptr(0, stream),
       indices(0, stream),
       data(0, stream)
@@ -110,11 +111,11 @@ class SparseSymmetrizeTest
       out.rows(), out.cols(), out.vals(), out.nnz, sum.data());
 
     sum_h = sum.value(stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   // input data
diff --git a/cpp/test/stats/accuracy.cu b/cpp/test/stats/accuracy.cu
index fbb47e1ca4..5bc0506e7f 100644
--- a/cpp/test/stats/accuracy.cu
+++ b/cpp/test/stats/accuracy.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/accuracy.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -45,7 +46,7 @@ template <typename T>
 template <typename T>
 class AccuracyTest : public ::testing::TestWithParam<AccuracyInputs<T>> {
  protected:
-  AccuracyTest() : stream(handle.get_stream()) {}
+  AccuracyTest() : stream(resource::get_cuda_stream(handle)) {}
 
   void SetUp() override
   {
@@ -76,7 +77,7 @@ class AccuracyTest : public ::testing::TestWithParam<AccuracyInputs<T>> {
 
  protected:
   AccuracyInputs<T> params;
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   T expectedVal, actualVal;
 };
diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu
index 4506a6730a..fb7b3825fc 100644
--- a/cpp/test/stats/adjusted_rand_index.cu
+++ b/cpp/test/stats/adjusted_rand_index.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/adjusted_rand_index.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -41,7 +42,9 @@ template <typename T, typename MathT = int>
 class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexParam> {
  protected:
   adjustedRandIndexTest()
-    : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream)
+    : stream(resource::get_cuda_stream(handle)),
+      firstClusterArray(0, stream),
+      secondClusterArray(0, stream)
   {
   }
 
@@ -137,7 +140,7 @@ class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexP
     truth_adjusted_rand_index = 1.0;
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   adjustedRandIndexParam params;
   T lowerLabelRange, upperLabelRange;
diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu
index a2a926d41d..c5c134418c 100644
--- a/cpp/test/stats/completeness_score.cu
+++ b/cpp/test/stats/completeness_score.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/completeness_score.cuh>
 #include <raft/stats/entropy.cuh>
 #include <raft/stats/mutual_info_score.cuh>
@@ -40,7 +41,7 @@ template <typename T>
 class completenessTest : public ::testing::TestWithParam<completenessParam> {
  protected:
   // the constructor
-  completenessTest() : stream(handle.get_stream()) {}
+  completenessTest() : stream(resource::get_cuda_stream(handle)) {}
 
   void SetUp() override
   {
@@ -100,7 +101,7 @@ class completenessTest : public ::testing::TestWithParam<completenessParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   completenessParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements               = 0;
diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu
index 3eb823377a..acfd1aecfe 100644
--- a/cpp/test/stats/contingencyMatrix.cu
+++ b/cpp/test/stats/contingencyMatrix.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/contingency_matrix.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -40,7 +41,7 @@ template <typename T>
 class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixParam> {
  protected:
   ContingencyMatrixTest()
-    : stream(handle.get_stream()),
+    : stream(resource::get_cuda_stream(handle)),
       dY(0, stream),
       dYHat(0, stream),
       dComputedOutput(0, stream),
@@ -135,7 +136,7 @@ class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixP
                                   raft::Compare<T>()));
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   ContingencyMatrixParam params;
   int numUniqueClasses = -1;
   T minLabel, maxLabel;
diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu
index c8a90b2f7d..ca9d437717 100644
--- a/cpp/test/stats/cov.cu
+++ b/cpp/test/stats/cov.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/cov.cuh>
 #include <raft/stats/mean.cuh>
@@ -53,8 +54,8 @@ class CovTest : public ::testing::TestWithParam<CovInputs<T>> {
 
   void SetUp() override
   {
-    raft::device_resources handle;
-    cudaStream_t stream = handle.get_stream();
+    raft::resources handle;
+    cudaStream_t stream = resource::get_cuda_stream(handle);
 
     params = ::testing::TestWithParam<CovInputs<T>>::GetParam();
     params.tolerance *= 2;
diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu
index 32b4d79da6..9ef678050d 100644
--- a/cpp/test/stats/dispersion.cu
+++ b/cpp/test/stats/dispersion.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/dispersion.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -45,7 +46,10 @@ template <typename T>
 template <typename T>
 class DispersionTest : public ::testing::TestWithParam<DispersionInputs<T>> {
  protected:
-  DispersionTest() : stream(handle.get_stream()), exp_mean(0, stream), act_mean(0, stream) {}
+  DispersionTest()
+    : stream(resource::get_cuda_stream(handle)), exp_mean(0, stream), act_mean(0, stream)
+  {
+  }
 
   void SetUp() override
   {
@@ -95,7 +99,7 @@ class DispersionTest : public ::testing::TestWithParam<DispersionInputs<T>> {
 
  protected:
   DispersionInputs<T> params;
-  raft::device_resources handle;
+  raft::resources handle;
   rmm::device_uvector<T> exp_mean, act_mean;
   cudaStream_t stream = 0;
   int npoints;
diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu
index 334f5aad1d..dea8828b26 100644
--- a/cpp/test/stats/entropy.cu
+++ b/cpp/test/stats/entropy.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/entropy.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -38,7 +39,7 @@ template <typename T>
 class entropyTest : public ::testing::TestWithParam<entropyParam> {
  protected:
   // the constructor
-  entropyTest() : stream(handle.get_stream()) {}
+  entropyTest() : stream(resource::get_cuda_stream(handle)) {}
 
   void SetUp() override
   {
@@ -88,7 +89,7 @@ class entropyTest : public ::testing::TestWithParam<entropyParam> {
                            upperLabelRange);
   }
 
-  raft::device_resources handle;
+  raft::resources handle;
   // declaring the data values
   entropyParam params;
   T lowerLabelRange, upperLabelRange;
diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu
index c6c3dd48ca..027434aa31 100644
--- a/cpp/test/stats/histogram.cu
+++ b/cpp/test/stats/histogram.cu
@@ -16,8 +16,9 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
-#include <raft/core/device_resources.hpp>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/histogram.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -64,7 +65,9 @@ struct HistInputs {
 class HistTest : public ::testing::TestWithParam<HistInputs> {
  protected:
   HistTest()
-    : in(0, handle.get_stream()), bins(0, handle.get_stream()), ref_bins(0, handle.get_stream())
+    : in(0, resource::get_cuda_stream(handle)),
+      bins(0, resource::get_cuda_stream(handle)),
+      ref_bins(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -72,7 +75,7 @@ class HistTest : public ::testing::TestWithParam<HistInputs> {
   {
     params = ::testing::TestWithParam<HistInputs>::GetParam();
     raft::random::RngState r(params.seed);
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     int len     = params.nrows * params.ncols;
     in.resize(len, stream);
     if (params.isNormal) {
@@ -91,11 +94,11 @@ class HistTest : public ::testing::TestWithParam<HistInputs> {
                 in.data(), params.nrows, params.ncols),
               raft::make_device_matrix_view<int, int, raft::col_major>(
                 bins.data(), params.nbins, params.ncols));
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   HistInputs params;
   rmm::device_uvector<int> in, bins, ref_bins;
 };
@@ -103,7 +106,9 @@ class HistTest : public ::testing::TestWithParam<HistInputs> {
 class HistMdspanTest : public ::testing::TestWithParam<HistInputs> {
  protected:
   HistMdspanTest()
-    : in(0, handle.get_stream()), bins(0, handle.get_stream()), ref_bins(0, handle.get_stream())
+    : in(0, resource::get_cuda_stream(handle)),
+      bins(0, resource::get_cuda_stream(handle)),
+      ref_bins(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -111,7 +116,7 @@ class HistMdspanTest : public ::testing::TestWithParam<HistInputs> {
   {
     params = ::testing::TestWithParam<HistInputs>::GetParam();
     raft::random::RngState r(params.seed);
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     int len     = params.nrows * params.ncols;
     in.resize(len, stream);
 
@@ -128,11 +133,11 @@ class HistMdspanTest : public ::testing::TestWithParam<HistInputs> {
     naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream);
     histogram<int>(
       params.type, bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream);
-    handle.sync_stream();
+    resource::sync_stream(handle);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   HistInputs params;
   rmm::device_uvector<int> in, bins, ref_bins;
 };
diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu
index 1b48bb1823..88247f5b50 100644
--- a/cpp/test/stats/homogeneity_score.cu
+++ b/cpp/test/stats/homogeneity_score.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/homogeneity_score.cuh>
 #include <raft/stats/mutual_info_score.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -47,7 +48,7 @@ class homogeneityTest : public ::testing::TestWithParam<homogeneityParam> {
     nElements       = params.nElements;
     lowerLabelRange = params.lowerLabelRange;
     upperLabelRange = params.upperLabelRange;
-    stream          = handle.get_stream();
+    stream          = resource::get_cuda_stream(handle);
 
     // generating random value test input
     std::vector<int> arr1(nElements, 0);
@@ -98,7 +99,7 @@ class homogeneityTest : public ::testing::TestWithParam<homogeneityParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   homogeneityParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements              = 0;
diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu
index 45804c6724..9e57f2c84f 100644
--- a/cpp/test/stats/information_criterion.cu
+++ b/cpp/test/stats/information_criterion.cu
@@ -15,10 +15,11 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/stats/information_criterion.cuh>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -63,7 +64,7 @@ class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
  public:
   BatchedICTest()
     : params(::testing::TestWithParam<BatchedICInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       res_d(sizeof(T) * params.batch_size, stream)
   {
   }
@@ -109,7 +110,7 @@ class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   BatchedICInputs<T> params;
   rmm::device_uvector<T> res_d;
diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu
index 15eac6428a..5714583675 100644
--- a/cpp/test/stats/kl_divergence.cu
+++ b/cpp/test/stats/kl_divergence.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/kl_divergence.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -39,7 +40,7 @@ class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
   {
     // getting the parameters
     params = ::testing::TestWithParam<klDivergenceParam>::GetParam();
-    stream = handle.get_stream();
+    stream = resource::get_cuda_stream(handle);
 
     nElements = params.nElements;
 
@@ -80,7 +81,7 @@ class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   klDivergenceParam params;
   int nElements              = 0;
   DataT truthklDivergence    = 0;
diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu
index 4d011a2425..0cb90b6d46 100644
--- a/cpp/test/stats/mean.cu
+++ b/cpp/test/stats/mean.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -45,7 +46,7 @@ class MeanTest : public ::testing::TestWithParam<MeanInputs<T>> {
  public:
   MeanTest()
     : params(::testing::TestWithParam<MeanInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       rows(params.rows),
       cols(params.cols),
       data(rows * cols, stream),
@@ -81,7 +82,7 @@ class MeanTest : public ::testing::TestWithParam<MeanInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MeanInputs<T> params;
diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu
index e5e01a2b10..4d797e60fe 100644
--- a/cpp/test/stats/mean_center.cu
+++ b/cpp/test/stats/mean_center.cu
@@ -17,6 +17,7 @@
 #include "../linalg/matrix_vector_op.cuh"
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
 #include <raft/stats/mean_center.cuh>
@@ -44,7 +45,7 @@ class MeanCenterTest : public ::testing::TestWithParam<MeanCenterInputs<T, IdxTy
  public:
   MeanCenterTest()
     : params(::testing::TestWithParam<MeanCenterInputs<T, IdxType>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       rows(params.rows),
       cols(params.cols),
       out(rows * cols, stream),
@@ -87,11 +88,11 @@ class MeanCenterTest : public ::testing::TestWithParam<MeanCenterInputs<T, IdxTy
                               params.bcastAlongRows,
                               (T)-1.0,
                               stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MeanCenterInputs<T, IdxType> params;
diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu
index d21ec43bba..df3d9d9c00 100644
--- a/cpp/test/stats/meanvar.cu
+++ b/cpp/test/stats/meanvar.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/math.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/stats/meanvar.cuh>
@@ -55,7 +56,7 @@ class MeanVarTest : public ::testing::TestWithParam<MeanVarInputs<T>> {
  public:
   MeanVarTest()
     : params(::testing::TestWithParam<MeanVarInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       data(params.rows * params.cols, stream),
       mean_act(params.cols, stream),
       vars_act(params.cols, stream)
@@ -89,7 +90,7 @@ class MeanVarTest : public ::testing::TestWithParam<MeanVarInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   MeanVarInputs<T> params;
diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu
index e0dc77520d..6a7ab9e917 100644
--- a/cpp/test/stats/minmax.cu
+++ b/cpp/test/stats/minmax.cu
@@ -18,7 +18,8 @@
 #include <gtest/gtest.h>
 #include <limits>
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/minmax.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -92,11 +93,15 @@ __global__ void nanKernel(T* data, const bool* mask, int len, T nan)
 template <typename T>
 class MinMaxTest : public ::testing::TestWithParam<MinMaxInputs<T>> {
  protected:
-  MinMaxTest() : minmax_act(0, handle.get_stream()), minmax_ref(0, handle.get_stream()) {}
+  MinMaxTest()
+    : minmax_act(0, resource::get_cuda_stream(handle)),
+      minmax_ref(0, resource::get_cuda_stream(handle))
+  {
+  }
 
   void SetUp() override
   {
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     params      = ::testing::TestWithParam<MinMaxInputs<T>>::GetParam();
     raft::random::RngState r(params.seed);
     int len = params.rows * params.cols;
@@ -131,7 +136,7 @@ class MinMaxTest : public ::testing::TestWithParam<MinMaxInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   MinMaxInputs<T> params;
   rmm::device_uvector<T> minmax_act;
   rmm::device_uvector<T> minmax_ref;
diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu
index 1b4ce26746..9f31350844 100644
--- a/cpp/test/stats/mutual_info_score.cu
+++ b/cpp/test/stats/mutual_info_score.cu
@@ -17,7 +17,8 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/mutual_info_score.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -104,7 +105,7 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam> {
     truthmutualInfo /= nElements;
 
     // allocating and initializing memory to the GPU
-    stream = handle.get_stream();
+    stream = resource::get_cuda_stream(handle);
 
     rmm::device_uvector<T> firstClusterArray(nElements, stream);
     rmm::device_uvector<T> secondClusterArray(nElements, stream);
@@ -126,7 +127,7 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   mutualInfoParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements             = 0;
diff --git a/cpp/test/stats/r2_score.cu b/cpp/test/stats/r2_score.cu
index 39ca623e11..aa4f069f09 100644
--- a/cpp/test/stats/r2_score.cu
+++ b/cpp/test/stats/r2_score.cu
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/r2_score.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -45,7 +46,7 @@ template <typename T>
 template <typename T>
 class R2_scoreTest : public ::testing::TestWithParam<R2_scoreInputs<T>> {
  protected:
-  R2_scoreTest() : stream(handle.get_stream()) {}
+  R2_scoreTest() : stream(resource::get_cuda_stream(handle)) {}
 
   void SetUp() override
   {
@@ -84,7 +85,7 @@ class R2_scoreTest : public ::testing::TestWithParam<R2_scoreInputs<T>> {
 
  protected:
   R2_scoreInputs<T> params;
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream = 0;
   T expectedVal, actualVal;
 };
diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu
index 10a31a27ca..41b0823e76 100644
--- a/cpp/test/stats/rand_index.cu
+++ b/cpp/test/stats/rand_index.cu
@@ -15,6 +15,7 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
 #include <raft/util/cudart_utils.hpp>
 
@@ -22,7 +23,7 @@
 
 #include <algorithm>
 #include <iostream>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/stats/rand_index.cuh>
 #include <random>
 
@@ -78,7 +79,7 @@ class randIndexTest : public ::testing::TestWithParam<randIndexParam> {
     truthRandIndex      = (double)(((double)(a_truth + b_truth)) / (double)nChooseTwo);
 
     // allocating and initializing memory to the GPU
-    stream = handle.get_stream();
+    stream = resource::get_cuda_stream(handle);
 
     rmm::device_uvector<T> firstClusterArray(size, stream);
     rmm::device_uvector<T> secondClusterArray(size, stream);
@@ -98,7 +99,7 @@ class randIndexTest : public ::testing::TestWithParam<randIndexParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   randIndexParam params;
   int lowerLabelRange = 0, upperLabelRange = 2;
   uint64_t size            = 0;
diff --git a/cpp/test/stats/regression_metrics.cu b/cpp/test/stats/regression_metrics.cu
index 48edad0f30..b0c4cca530 100644
--- a/cpp/test/stats/regression_metrics.cu
+++ b/cpp/test/stats/regression_metrics.cu
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/core/interruptible.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/regression_metrics.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -75,7 +76,7 @@ void naive_reg_metrics(std::vector<T>& predictions,
 template <typename T>
 class RegressionTest : public ::testing::TestWithParam<RegressionInputs<T>> {
  protected:
-  RegressionTest() : stream(handle.get_stream()) {}
+  RegressionTest() : stream(resource::get_cuda_stream(handle)) {}
 
   void SetUp() override
   {
@@ -106,7 +107,7 @@ class RegressionTest : public ::testing::TestWithParam<RegressionInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   RegressionInputs<T> params;
   cudaStream_t stream           = 0;
   double mean_abs_error         = 0;
diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu
index 9ad89d59c0..ed69b2f3ac 100644
--- a/cpp/test/stats/silhouette_score.cu
+++ b/cpp/test/stats/silhouette_score.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/cudart_utils.hpp>
 
@@ -42,9 +43,9 @@ template <typename LabelT, typename DataT>
 class silhouetteScoreTest : public ::testing::TestWithParam<silhouetteScoreParam> {
  protected:
   silhouetteScoreTest()
-    : d_X(0, handle.get_stream()),
-      sampleSilScore(0, handle.get_stream()),
-      d_labels(0, handle.get_stream())
+    : d_X(0, resource::get_cuda_stream(handle)),
+      sampleSilScore(0, resource::get_cuda_stream(handle)),
+      d_labels(0, resource::get_cuda_stream(handle))
   {
   }
 
@@ -62,7 +63,7 @@ class silhouetteScoreTest : public ::testing::TestWithParam<silhouetteScoreParam
     std::generate(h_labels.begin(), h_labels.end(), [&]() { return intGenerator(dre); });
 
     // allocating and initializing memory to the GPU
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     d_X.resize(nElements, stream);
     d_labels.resize(nElements, stream);
     RAFT_CUDA_TRY(cudaMemsetAsync(d_X.data(), 0, d_X.size() * sizeof(DataT), stream));
@@ -80,7 +81,7 @@ class silhouetteScoreTest : public ::testing::TestWithParam<silhouetteScoreParam
     raft::distance::pairwise_distance(
       handle, d_X.data(), d_X.data(), d_distanceMatrix.data(), nRows, nRows, nCols, params.metric);
 
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
 
     raft::update_host(h_distanceMatrix, d_distanceMatrix.data(), nRows * nRows, stream);
 
@@ -188,7 +189,7 @@ class silhouetteScoreTest : public ::testing::TestWithParam<silhouetteScoreParam
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   silhouetteScoreParam params;
   int nLabels;
   rmm::device_uvector<DataT> d_X;
diff --git a/cpp/test/stats/stddev.cu b/cpp/test/stats/stddev.cu
index dfc31f31d2..998f7a88e7 100644
--- a/cpp/test/stats/stddev.cu
+++ b/cpp/test/stats/stddev.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/matrix/math.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
@@ -44,7 +45,7 @@ class StdDevTest : public ::testing::TestWithParam<StdDevInputs<T>> {
  public:
   StdDevTest()
     : params(::testing::TestWithParam<StdDevInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       rows(params.rows),
       cols(params.cols),
       data(rows * cols, stream),
@@ -66,7 +67,7 @@ class StdDevTest : public ::testing::TestWithParam<StdDevInputs<T>> {
     vars_act.resize(cols, stream);
     normal(handle, r, data.data(), len, params.mean, params.stddev);
     stdVarSGtest(data.data(), stream);
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
   void stdVarSGtest(T* data, cudaStream_t stream)
@@ -114,7 +115,7 @@ class StdDevTest : public ::testing::TestWithParam<StdDevInputs<T>> {
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   StdDevInputs<T> params;
diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu
index f6b6ffcc45..040b662c42 100644
--- a/cpp/test/stats/sum.cu
+++ b/cpp/test/stats/sum.cu
@@ -15,8 +15,9 @@
  */
 
 #include "../test_utils.cuh"
+#include <raft/core/resource/cuda_stream.hpp>
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/stats/sum.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -46,7 +47,7 @@ class SumTest : public ::testing::TestWithParam<SumInputs<T>> {
  public:
   SumTest()
     : params(::testing::TestWithParam<SumInputs<T>>::GetParam()),
-      stream(handle.get_stream()),
+      stream(resource::get_cuda_stream(handle)),
       rows(params.rows),
       cols(params.cols),
       data(rows * cols, stream),
@@ -68,11 +69,11 @@ class SumTest : public ::testing::TestWithParam<SumInputs<T>> {
     sum(handle,
         raft::make_device_matrix_view<const T>(data.data(), rows, cols),
         raft::make_device_vector_view(sum_act.data(), cols));
-    handle.sync_stream(stream);
+    resource::sync_stream(handle, stream);
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   cudaStream_t stream;
 
   SumInputs<T> params;
diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu
index 15b27c7669..502a59f5c2 100644
--- a/cpp/test/stats/trustworthiness.cu
+++ b/cpp/test/stats/trustworthiness.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -28,7 +29,10 @@ namespace stats {
 
 class TrustworthinessScoreTest : public ::testing::Test {
  public:
-  TrustworthinessScoreTest() : d_X(0, handle.get_stream()), d_X_embedded(0, handle.get_stream()) {}
+  TrustworthinessScoreTest()
+    : d_X(0, resource::get_cuda_stream(handle)), d_X_embedded(0, resource::get_cuda_stream(handle))
+  {
+  }
 
  protected:
   void basicTest()
@@ -310,7 +314,7 @@ class TrustworthinessScoreTest : public ::testing::Test {
       -0.02323332, 0.04292452,  0.39291084,  -0.94897962, -0.63863206, -0.16546988, 0.23698957,
       -0.30633628};
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
 
     d_X.resize(X.size(), stream);
     d_X_embedded.resize(X_embedded.size(), stream);
@@ -334,7 +338,7 @@ class TrustworthinessScoreTest : public ::testing::Test {
   void TearDown() override {}
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
 
   rmm::device_uvector<float> d_X;
   rmm::device_uvector<float> d_X_embedded;
diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu
index 9d1522a5c8..0cc164f277 100644
--- a/cpp/test/stats/v_measure.cu
+++ b/cpp/test/stats/v_measure.cu
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/stats/homogeneity_score.cuh>
 #include <raft/stats/v_measure.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -65,7 +66,7 @@ class vMeasureTest : public ::testing::TestWithParam<vMeasureParam> {
 
     // allocating and initializing memory to the GPU
 
-    stream = handle.get_stream();
+    stream = resource::get_cuda_stream(handle);
     rmm::device_uvector<T> truthClusterArray(nElements, stream);
     rmm::device_uvector<T> predClusterArray(nElements, stream);
     raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream);
@@ -103,7 +104,7 @@ class vMeasureTest : public ::testing::TestWithParam<vMeasureParam> {
   }
 
   // declaring the data values
-  raft::device_resources handle;
+  raft::resources handle;
   vMeasureParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements           = 0;
diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu
index 7e28ca9aa3..da1a825da1 100644
--- a/cpp/test/stats/weighted_mean.cu
+++ b/cpp/test/stats/weighted_mean.cu
@@ -18,6 +18,7 @@
 #include <cstdint>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/weighted_mean.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -72,7 +73,7 @@ class RowWeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T
     params = ::testing::TestWithParam<WeightedMeanInputs<T>>::GetParam();
     raft::random::RngState r(params.seed);
     int rows = params.M, cols = params.N, len = rows * cols;
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     // device-side data
     din.resize(len);
     dweights.resize(cols);
@@ -112,7 +113,7 @@ class RowWeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   WeightedMeanInputs<T> params;
   thrust::host_vector<T> hin, hweights;
   thrust::device_vector<T> din, dweights, dexp, dact;
@@ -147,7 +148,7 @@ class ColWeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T
     raft::random::RngState r(params.seed);
     int rows = params.M, cols = params.N, len = rows * cols;
 
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     // device-side data
     din.resize(len);
     dweights.resize(rows);
@@ -186,7 +187,7 @@ class ColWeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   WeightedMeanInputs<T> params;
   thrust::host_vector<T> hin, hweights;
   thrust::device_vector<T> din, dweights, dexp, dact;
@@ -199,7 +200,7 @@ class WeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T>>
   {
     params = ::testing::TestWithParam<WeightedMeanInputs<T>>::GetParam();
     raft::random::RngState r(params.seed);
-    auto stream = handle.get_stream();
+    auto stream = resource::get_cuda_stream(handle);
     int rows = params.M, cols = params.N, len = rows * cols;
     auto weight_size = params.along_rows ? cols : rows;
     auto mean_size   = params.along_rows ? rows : cols;
@@ -244,7 +245,7 @@ class WeightedMeanTest : public ::testing::TestWithParam<WeightedMeanInputs<T>>
   }
 
  protected:
-  raft::device_resources handle;
+  raft::resources handle;
   WeightedMeanInputs<T> params;
   thrust::host_vector<T> hin, hweights;
   thrust::device_vector<T> din, dweights, dexp, dact;
diff --git a/cpp/test/util/cudart_utils.cpp b/cpp/test/util/cudart_utils.cpp
index 57cd2ff9b0..3fde494823 100644
--- a/cpp/test/util/cudart_utils.cpp
+++ b/cpp/test/util/cudart_utils.cpp
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <vector>
 
@@ -90,11 +91,11 @@ TEST(Raft, Utils)
 
 TEST(Raft, GetDeviceForAddress)
 {
-  device_resources handle;
+  resources handle;
   std::vector<int> h(1);
   ASSERT_EQ(-1, raft::get_device_for_address(h.data()));
 
-  rmm::device_uvector<int> d(1, handle.get_stream());
+  rmm::device_uvector<int> d(1, resource::get_cuda_stream(handle));
   ASSERT_EQ(0, raft::get_device_for_address(d.data()));
 }
 
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index c206808d21..7664744145 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -493,7 +493,7 @@ E.g. with a CUDA-aware MPI, a RAFT user could use code like this to inject an in
 
 ```cpp
 #include <mpi.h>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/algo.hpp>
 ...
@@ -519,7 +519,7 @@ int main(int argc, char * argv[])
     MPI_Comm_dup(MPI_COMM_WORLD, &raft_mpi_comms);
 
     {
-        raft::device_resources res;
+        raft::resources res;
         initialize_mpi_comms(res, raft_mpi_comms);
 
         ...
diff --git a/docs/source/using_comms.rst b/docs/source/using_comms.rst
index 84ea61c248..9ddccf8b7d 100644
--- a/docs/source/using_comms.rst
+++ b/docs/source/using_comms.rst
@@ -5,7 +5,7 @@ RAFT provides a communications abstraction for writing distributed algorithms wh
 
 While users of RAFT’s communications layer largely get MPI integration for free just by installing MPI and using `mpirun` to run their applications, the `raft-dask` Python package provides a mechanism for executing algorithms written using RAFT’s communications layer in a Dask cluster. It will help to walk through a small example of how one would build an algorithm with RAFT’s communications layer.
 
-First, an instance of `raft::comms_t` is passed through the `raft::device_resources` instance and code is written to utilize collective and/or point-to-point communications as needed.
+First, an instance of `raft::comms_t` is passed through the `raft::resources` instance and code is written to utilize collective and/or point-to-point communications as needed.
 
 .. code-block:: cpp
    :caption: Example function written with the RAFT comms API
@@ -14,16 +14,16 @@ First, an instance of `raft::comms_t` is passed through the `raft::device_resour
    #include <raft/core/device_mdspan.hpp>
    #include <raft/util/cudart_utils.hpp>
 
-   void test_allreduce(raft::device_resources const &handle, int root) {
-     raft::comms::comms_t const& communicator = handle.get_comms();
-     cudaStream_t stream = handle.get_stream();
+   void test_allreduce(raft::resources const &handle, int root) {
+     raft::comms::comms_t const& communicator = resource::get_comms(handle);
+     cudaStream_t stream = resource::get_cuda_stream(handle);
      raft::device_scalar<int> temp_scalar(stream);
 
      int to_send = 1;
      raft::copy(temp_scalar.data(), &to_send, 1, stream);
      communicator.allreduce(temp_scalar.data(), temp_scalar.data(), 1,
                             raft::comms::opt_t::SUM, stream);
-     handle.sync_stream();
+     resource::sync_stream(handle);
    }
 
 This exact function can now be executed in several different types of GPU clusters. For example, it can be executed with MPI by initializing an instance of `raft::comms::mpi_comms` with the `MPI_Comm`:
@@ -32,9 +32,9 @@ This exact function can now be executed in several different types of GPU cluste
    :caption: Example of running test_allreduce() in MPI
 
    #include <raft/core/mpi_comms.hpp>
-   #include <raft/core/device_resources.hpp>
+   #include <raft/core/resources.hpp>
 
-   raft::device_resources resource_handle;
+   raft::resources resource_handle;
    // ...
    // initialize MPI_Comm
    // ...
diff --git a/docs/source/using_libraft.md b/docs/source/using_libraft.md
index ef055184e7..70a17e289b 100644
--- a/docs/source/using_libraft.md
+++ b/docs/source/using_libraft.md
@@ -31,7 +31,7 @@ To verify that you are not accidentally instantiating templates that have not be
 #endif
 
 #include <cstdint>
-#include <raft/core/device_resources.hpp>
+#include <raft/core/resources.hpp>
 #include <raft/distance/distance.cuh>
 
 int main()

From a1d1fd68b7c77d45ef476cdc6c5c1c465d9b8c46 Mon Sep 17 00:00:00 2001
From: Mahesh Doijade <36705640+mdoijade@users.noreply.github.com>
Date: Wed, 17 May 2023 03:34:50 +0530
Subject: [PATCH 56/78] Fused L2 1-NN based on cutlass 3xTF32 / DMMA (#1118)

-- 3xTF32 & DMMA cutlass based persistent FusedL2NN kernel version loosely based on grouped gemm but customized for single problem size.
-- as the value of `k` increases the performance benefit of this implementation gets better.
for k==64 upto 1.3x, for k ==128 upto 1.53x, k == 256, up to 1.67x.
-- for all the sizes of `k`  this kernel out performs previous implementation.
-- attaching the results of FusedL2NN Benchmark of previous implementation with this cutlass version.

Authors:
  - Mahesh Doijade (https://github.com/mdoijade)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1118
---
 cpp/cmake/thirdparty/get_cutlass.cmake        |   4 +-
 .../custom_epilogue_with_broadcast.h          | 671 ++++++++++++++++++
 .../detail/fused_distance_nn/cutlass_base.cuh | 161 +++++
 .../detail/fused_distance_nn/epilogue.cuh     | 136 ++++
 .../epilogue_elementwise.cuh                  | 216 ++++++
 .../distance/detail/fused_distance_nn/gemm.h  | 410 +++++++++++
 .../fused_distance_nn/persistent_gemm.h       | 515 ++++++++++++++
 .../predicated_tile_iterator_normvec_smem.h   | 448 ++++++++++++
 .../predicated_tile_iterator_reduced_vec.h    | 626 ++++++++++++++++
 .../raft/distance/detail/fused_l2_nn.cuh      | 110 ++-
 .../detail/pairwise_distance_cutlass_base.cuh |  23 +-
 .../detail/predicated_tile_iterator_normvec.h |  14 +-
 .../neighbors/detail/connect_components.cuh   |  20 +-
 cpp/include/raft/util/cutlass_utils.cuh       |  53 ++
 cpp/test/distance/fused_l2_nn.cu              |   6 +-
 15 files changed, 3369 insertions(+), 44 deletions(-)
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/cutlass_base.cuh
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/epilogue.cuh
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/epilogue_elementwise.cuh
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/gemm.h
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/persistent_gemm.h
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem.h
 create mode 100644 cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec.h
 create mode 100644 cpp/include/raft/util/cutlass_utils.cuh

diff --git a/cpp/cmake/thirdparty/get_cutlass.cmake b/cpp/cmake/thirdparty/get_cutlass.cmake
index cb809de445..853fd7c52f 100644
--- a/cpp/cmake/thirdparty/get_cutlass.cmake
+++ b/cpp/cmake/thirdparty/get_cutlass.cmake
@@ -78,7 +78,7 @@ function(find_and_configure_cutlass)
 endfunction()
 
 if(NOT RAFT_CUTLASS_GIT_TAG)
-  set(RAFT_CUTLASS_GIT_TAG v2.9.1)
+  set(RAFT_CUTLASS_GIT_TAG v2.10.0)
 endif()
 
 if(NOT RAFT_CUTLASS_GIT_REPOSITORY)
@@ -86,5 +86,5 @@ if(NOT RAFT_CUTLASS_GIT_REPOSITORY)
 endif()
 
 find_and_configure_cutlass(
-  VERSION 2.9.1 REPOSITORY ${RAFT_CUTLASS_GIT_REPOSITORY} PINNED_TAG ${RAFT_CUTLASS_GIT_TAG}
+  VERSION 2.10.0 REPOSITORY ${RAFT_CUTLASS_GIT_REPOSITORY} PINNED_TAG ${RAFT_CUTLASS_GIT_TAG}
 )
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
new file mode 100644
index 0000000000..10827a8778
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h
@@ -0,0 +1,671 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*! \file
+
+  \brief Epilogue for threadblock scoped GEMMs using Tensor Ops.
+
+  The epilogue rearranges the result of a matrix product through shared memory to match canonical
+  tensor layouts in global memory. Epilogues support conversion and reduction operations.
+
+This file contains a customized version of EpilogueWithBroadcast from CUTLASS 2.9.1
+(https://github.com/NVIDIA/cutlass/blob/v2.9.1/include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h)
+
+Changes:
+- customized the compute_source_needed_() and apply_output_operator_() to suit the needs of per row
+reduction
+*/
+
+#pragma once
+
+#if defined(__CUDACC_RTC__)
+#include <cuda/std/cassert>
+#include <cuda/std/utility>
+#else
+#include <assert.h>
+#include <utility>
+#endif
+
+#include <cutlass/aligned_buffer.h>
+#include <cutlass/array.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/fast_math.h>
+#include <cutlass/functional.h>
+#include <cutlass/layout/tensor.h>
+#include <cutlass/layout/vector.h>
+#include <cutlass/numeric_conversion.h>
+#include <cutlass/numeric_types.h>
+#include <cutlass/tensor_coord.h>
+
+#include <cutlass/gemm/gemm.h>
+
+#include <cutlass/transform/pitch_linear_thread_map.h>
+#include <cutlass/transform/threadblock/regular_tile_iterator.h>
+
+#include <cutlass/epilogue/threadblock/epilogue_base.h>
+#include <cutlass/epilogue/threadblock/predicated_tile_iterator.h>
+
+#include <cutlass/numeric_types.h>
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace epilogue {
+namespace threadblock {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// This base class is meant to define the concept required of the
+/// EpilogueWithBroadcast::OutputOp
+template <typename ElementC_,
+          typename ElementAccumulator_,
+          typename ElementCompute_,
+          typename ElementZ_,
+          typename ElementT_,
+          int ElementsPerAccess,
+          bool StoreZ = true,
+          bool StoreT = true>
+struct EpilogueWithBroadcastOpBaseCustom {
+  using ElementOutput                 = ElementC_;
+  using ElementAccumulator            = ElementAccumulator_;
+  using ElementCompute                = ElementCompute_;
+  using ElementZ                      = ElementZ_;
+  using ElementT                      = ElementT_;
+  static int const kElementsPerAccess = ElementsPerAccess;
+
+  using FragmentAccumulator = Array<ElementAccumulator, kElementsPerAccess>;
+  using FragmentCompute     = Array<ElementCompute, kElementsPerAccess>;
+  using FragmentC           = Array<ElementOutput, kElementsPerAccess>;
+  using FragmentZ           = Array<ElementZ, kElementsPerAccess>;
+  using FragmentT           = Array<ElementT, kElementsPerAccess>;
+
+  /// If true, the 'Z' tensor is stored
+  static bool const kStoreZ = StoreZ;
+
+  /// If true, the 'T' tensor is stored
+  static bool const kStoreT = StoreT;
+
+  /// Parameters structure - required
+  struct Params {};
+
+  //
+  // Methods
+  //
+
+  /// Constructor from Params
+  EpilogueWithBroadcastOpBaseCustom(Params const& params_) {}
+
+  /// Determine if the source is needed. May return false if
+  bool is_source_needed() const { return true; }
+
+  CUTLASS_HOST_DEVICE
+  void set_k_partition(int k_partition, int k_partition_count) {}
+
+  /// Applies the operation when is_source_needed() is true
+  CUTLASS_HOST_DEVICE
+  void operator()(FragmentZ& frag_Z,
+                  FragmentT& frag_T,
+                  FragmentAccumulator const& AB,
+                  FragmentC const& frag_C,
+                  FragmentCompute const& V) const
+  {
+  }
+
+  /// Applies the operation when is_source_needed() is false
+  CUTLASS_HOST_DEVICE
+  void operator()(FragmentZ& frag_Z,
+                  FragmentT& frag_T,
+                  FragmentAccumulator const& AB,
+                  FragmentCompute const& V) const
+  {
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Epilogue operator with bias vector broadcast over columns.
+///
+/// Computes the following:
+///
+///
+///  Z, T = OutputOp(AB, C, Broadcast)
+///
+///  if (ElementwiseOp::kStoreZ) {
+///    store(converted_u);
+///  }
+///
+///  if (ElementwiseOp::kStoreT) {
+///    store(v);
+///  }
+///
+template <
+  typename Shape_,               ///< Shape of threadblock tile (concept: GemmShape)
+  typename WarpMmaOperator_,     ///< Warp-level MMA operator (concept: gemm::warp::MmaTensorOp)
+  int PartitionsK,               ///< Number of partitions of the K dimension
+  typename OutputTileIterator_,  ///< Tile iterator reading and writing output tensors (z)
+  typename TensorTileIterator_,  ///< Additional tile iterator for tensor-valued operands (t)
+  typename ElementVector_,       ///< Pointer to broadcast vector
+  typename AccumulatorFragmentIterator_,  ///< Fragment iterator selecting accumulators
+  typename WarpTileIterator_,    ///< Warp-scoped tile iterator writing accumulators to SMEM
+  typename SharedLoadIterator_,  ///< Threadblock-scoped tile iterator loading from SMEM
+  typename OutputOp_,            ///< Output operator - concept is EpilogueWithBroadcastOp
+  typename Padding_,  ///< Padding added to SMEM allocation to avoid bank conflicts (concept:
+                      ///< MatrixShape)
+  int FragmentsPerPartition = 1,  ///< Used to coarsten the epilogue granularity
+  int IterationsUnroll      =     ///< Used to reduce binary size when epilogue op is large
+  (!IsEpilogueFunctorHeavy<OutputOp_>::value)>
+class EpilogueWithBroadcastCustom : public EpilogueBase<Shape_,
+                                                        typename WarpMmaOperator_::Shape,
+                                                        PartitionsK,
+                                                        AccumulatorFragmentIterator_,
+                                                        WarpTileIterator_,
+                                                        Padding_,
+                                                        FragmentsPerPartition> {
+ public:
+  using Base = EpilogueBase<Shape_,
+                            typename WarpMmaOperator_::Shape,
+                            PartitionsK,
+                            AccumulatorFragmentIterator_,
+                            WarpTileIterator_,
+                            Padding_,
+                            FragmentsPerPartition>;
+
+  using Shape                       = Shape_;
+  using WarpMmaOperator             = WarpMmaOperator_;
+  static int const kPartitionsK     = PartitionsK;
+  using OutputTileIterator          = OutputTileIterator_;
+  using TensorTileIterator          = TensorTileIterator_;
+  using ElementVector               = ElementVector_;
+  using AccumulatorFragmentIterator = AccumulatorFragmentIterator_;
+  using WarpTileIterator            = WarpTileIterator_;
+  using SharedLoadIterator          = SharedLoadIterator_;
+  using OutputOp                    = OutputOp_;
+  using Padding                     = Padding_;
+
+  using Layout    = layout::RowMajor;
+  using LongIndex = typename Layout::LongIndex;
+
+  /// The complete warp-level accumulator tile
+  using AccumulatorTile = typename Base::AccumulatorTile;
+
+  /// Accumulator element
+  using ElementAccumulator = typename WarpTileIterator::Element;
+
+  /// Compute data type produced by the output op
+  using ElementCompute = typename OutputOp::ElementCompute;
+
+  /// Compute fragment
+  using FragmentCompute = Array<ElementCompute, OutputTileIterator::Fragment::kElements>;
+
+  /// Thread map used by output tile iterators
+  using ThreadMap = typename OutputTileIterator::ThreadMap;
+
+  /// Fragment object used to store the broadcast values
+  using BroadcastFragment =
+    Array<ElementCompute, ThreadMap::Iterations::kColumn * ThreadMap::kElementsPerAccess>;
+
+  /// Output element
+  using ElementOutput = typename OutputTileIterator::Element;
+
+  /// Data type of additional tensor
+  using ElementTensor = typename TensorTileIterator::Element;
+
+  /// Output access size
+  static int const kElementsPerAccess = OutputTileIterator::kElementsPerAccess;
+
+  /// Tensor reference to destination tensor
+  using TensorRef = typename OutputTileIterator::TensorRef;
+
+  /// Tensor reference to sync tensor
+  using SyncTensorRef = typename cutlass::TensorRef<int, cutlass::layout::PackedVectorLayout>;
+
+  /// Const tensor reference to source tensor
+  using ConstTensorRef = typename OutputTileIterator::ConstTensorRef;
+
+  /// Array type used to output
+  using OutputAccessType =
+    Array<typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess>;
+
+  /// Array type used by output functor
+  using AccumulatorAccessType =
+    Array<typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess>;
+
+  /// Array type used by output functor
+  using ComputeAccessType = Array<ElementCompute, OutputTileIterator::kElementsPerAccess>;
+
+  /// Tensor access type
+  using TensorAccessType = Array<ElementTensor, OutputTileIterator::kElementsPerAccess>;
+
+  /// Number of warps
+  using WarpCount = typename Base::WarpCount;
+
+  /// Shared memory allocation from epilogue base class
+  using BaseSharedStorage = typename Base::SharedStorage;
+
+  static int constexpr kSmemTiles =
+    Base::kFragmentsPerIteration > 1 ? Base::kFragmentsPerIteration : kPartitionsK;
+  static int constexpr kSmemPointerOffset = Base::SharedStorage::StorageShape::kCount / kSmemTiles;
+
+  /// Used for the broadcast
+  struct BroadcastDetail {
+    /// Number of threads per warp
+    static int const kWarpSize = 32;
+
+    static int const kElementsPerAccess = ThreadMap::kElementsPerAccess;
+
+    /// Number of distinct scalar column indices handled by each thread
+    static int const kColumnsPerThread =
+      ThreadMap::Iterations::kColumn * ThreadMap::kElementsPerAccess;
+
+    /// Number of distinct scalar row indices handled by each thread
+    static int const kRowsPerThread =
+      ThreadMap::Iterations::kCount / ThreadMap::Iterations::kColumn;
+
+    /// Number of threads per threadblock
+    static int const kThreadCount = kWarpSize * WarpCount::kCount;
+
+    /// Number of distinct threads per row of output tile
+    static int const kThreadsPerRow = (Shape::kN / kColumnsPerThread);
+
+    /// Number of distinct threads which must be reduced during the final reduction phase within the
+    /// threadblock.
+    static int const kThreadRows = kThreadCount / kThreadsPerRow;
+
+    /// I'm not sure what I meant here.
+    static int const kThreadAccessesPerRow =
+      const_max(1, (Shape::kN + kThreadCount - 1) / kThreadCount);
+
+    /// Shape of the shared memory allocation for the epilogue
+    using StorageShape = MatrixShape<kThreadRows, Shape::kN>;
+
+    /// Debug printing
+    CUTLASS_DEVICE
+    static void print()
+    {
+#if 0
+      printf("BroadcastDetail {\n");
+      printf(
+        "  kColumnsPerThread: %d\nkRowsPerThread: %d\n,kThreadCount: %d\nkThreadsPerRow: %d\n"
+        "kThreadRows: %d\nThreadAccessesPerRow: %d\nStorageShape: %d x %d (count: %d)\n",
+        kColumnsPerThread,
+        kRowsPerThread,
+        kThreadCount,
+        kThreadsPerRow,
+        kThreadRows,
+        kThreadAccessesPerRow,
+        StorageShape::kRow,
+        StorageShape::kColumn,
+        StorageShape::kCount
+      );
+      printf("};\n");
+#endif
+    }
+  };
+
+  /// Shared storage structure (shadows base) with additional SMEM buffer for reduction
+  struct SharedStorage {
+    union {
+      BaseSharedStorage base;
+    };
+
+    CUTLASS_HOST_DEVICE
+    SharedStorage() {}
+  };
+
+ public:
+  static_assert(SharedLoadIterator::Fragment::kElements == TensorTileIterator::Fragment::kElements,
+                "Mismatch between shared load iterator and output tile iterator.");
+
+  static_assert(OutputTileIterator::kElementsPerAccess,
+                "OutputTileIterator::kElementsPerAccess must not be zero.");
+
+  static_assert(!(OutputTileIterator::Fragment::kElements % OutputTileIterator::kElementsPerAccess),
+                "Divisibility");
+
+ private:
+  /// Loads fragment from shared memory aligned with output tensor
+  SharedLoadIterator shared_load_iterator_;
+
+  /// Thread index within the threadblock
+  int thread_idx_;
+
+ public:
+  /// Constructor
+  CUTLASS_DEVICE
+  EpilogueWithBroadcastCustom(SharedStorage& shared_storage,  ///< Shared storage object
+                              int thread_idx,  ///< ID of a thread within the threadblock
+                              int warp_idx,    ///< ID of warp within threadblock
+                              int lane_idx     ///< Id of thread within warp
+                              )
+    : Base(shared_storage.base, thread_idx, warp_idx, lane_idx),
+      shared_load_iterator_(shared_storage.base.reference(), thread_idx),
+      thread_idx_(thread_idx)
+  {
+  }
+
+  /// Streams the result to global memory
+  CUTLASS_DEVICE
+  void operator()(
+    OutputOp const& output_op,            ///< Output operator
+    ElementVector const* broadcast_ptr,   ///< Broadcast vector
+    AccumulatorTile const& accumulators,  ///< Complete warp-level accumulator tile
+    OutputTileIterator source_iterator,   ///< Tile iterator for source accumulator matrix
+    TensorTileIterator
+      tensor_iterator,  ///< Threadblock tile iterator for additional tensor operand
+    MatrixCoord const&
+      problem_size =    ///< Problem size needed to guard against out-of-bounds accesses
+    MatrixCoord(Shape::kM, Shape::kN),
+    MatrixCoord const&
+      threadblock_offset =  ///< Threadblock's initial offset within the problem size space
+    MatrixCoord())
+  {
+    BroadcastFragment broadcast_fragment;
+
+    load_broadcast_fragment_(broadcast_fragment, broadcast_ptr, problem_size, threadblock_offset);
+
+    compute_source_needed_(
+      output_op, broadcast_fragment, accumulators, source_iterator, tensor_iterator);
+  }
+
+ private:
+  CUTLASS_DEVICE
+  void load_broadcast_fragment_(
+    BroadcastFragment&
+      broadcast_fragment,  ///< Fragment containing the accumulated partial reduction over columns
+    ElementVector const* broadcast_ptr,  ///< Broadcast vector
+    MatrixCoord const&
+      problem_size,       ///< Problem size needed to guard against out-of-bounds accesses
+    MatrixCoord const&
+      threadblock_offset  ///< Threadblock's initial offset within the problem size space
+  )
+  {
+    broadcast_fragment.clear();
+
+    // If no pointer is supplied, set with all zeros and avoid memory accesses
+    if (!broadcast_ptr) { return; }
+
+    int thread_initial_column = ThreadMap::initial_offset(thread_idx_).column();
+
+    int thread_column_idx = threadblock_offset.column() + thread_initial_column;
+    broadcast_ptr += thread_initial_column;
+
+    NumericArrayConverter<ElementCompute, ElementVector, BroadcastDetail::kElementsPerAccess>
+      converter;
+    using AccessType          = AlignedArray<ElementVector, BroadcastDetail::kElementsPerAccess>;
+    using ComputeFragmentType = Array<ElementCompute, BroadcastDetail::kElementsPerAccess>;
+
+    ComputeFragmentType* frag_ptr = reinterpret_cast<ComputeFragmentType*>(&broadcast_fragment);
+
+    CUTLASS_PRAGMA_UNROLL
+    for (int j = 0; j < ThreadMap::Iterations::kColumn; ++j) {
+      AccessType loaded;
+
+      loaded.clear();
+
+      if (thread_column_idx < problem_size.column()) {
+        loaded = *reinterpret_cast<AccessType const*>(broadcast_ptr);
+      }
+
+      ComputeFragmentType cvt = converter(loaded);
+      frag_ptr[j]             = cvt;
+
+      thread_column_idx += ThreadMap::Delta::kColumn;
+      broadcast_ptr += ThreadMap::Delta::kColumn;
+    }
+  }
+
+  template <class Seq>
+  struct acc2smem_source_not_needed;
+
+  template <size_t... Seq>
+  struct acc2smem_source_not_needed<cutlass::index_sequence<Seq...>> {
+    template <int Advance>
+    CUTLASS_DEVICE static void helper(AccumulatorFragmentIterator accum_fragment_iterator,
+                                      WarpTileIterator& warp_tile_iterator)
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < Advance; i++) {
+        ++accum_fragment_iterator;
+      }
+
+      CUTLASS_PRAGMA_UNROLL
+      for (int p = 0; p < Base::kFragmentsPerIteration; ++p) {
+        typename AccumulatorFragmentIterator::Fragment accum_fragment;
+
+        accum_fragment_iterator.load(accum_fragment);
+        ++accum_fragment_iterator;
+
+        warp_tile_iterator.store(accum_fragment);
+        if (p < Base::kFragmentsPerIteration - 1) {
+          warp_tile_iterator.add_pointer_offset(kSmemPointerOffset);
+        }
+      }
+
+      if (Base::kFragmentsPerIteration > 1) {
+        warp_tile_iterator.add_pointer_offset(kSmemPointerOffset *
+                                              (1 - Base::kFragmentsPerIteration));
+      }
+    }
+
+    CUTLASS_DEVICE
+    static void push(size_t pos,
+                     AccumulatorFragmentIterator const& iterator_begin,
+                     WarpTileIterator& warp_tile_iterator)
+    {
+      int dummy[] = {
+        (pos == (Seq * Base::kFragmentsPerIteration)) &&
+        (helper<Seq * Base::kFragmentsPerIteration>(iterator_begin, warp_tile_iterator), 0)...};
+
+      CUTLASS_UNUSED(dummy[0]);
+    }
+  };
+
+  /// Streams the result to global memory
+  CUTLASS_DEVICE
+  void compute_source_not_needed_(
+    OutputOp const& output_op,  ///< Output operator
+    BroadcastFragment const&
+      broadcast_fragment,  ///< Fragment containing the accumulated partial reduction over columns
+    OutputTileIterator destination_iterator,  ///< Tile iterator for destination
+    AccumulatorTile const& accumulators,      ///< Complete warp-level accumulator tile
+    TensorTileIterator tensor_iterator  ///< Threadblock tile iterator for additioanl tensor operand
+  )
+  {
+  }
+
+  template <class Seq>
+  struct acc2smem_source_needed;
+
+  template <size_t... Seq>
+  struct acc2smem_source_needed<cutlass::index_sequence<Seq...>> {
+    template <int Advance>
+    CUTLASS_DEVICE static void helper(AccumulatorFragmentIterator accum_fragment_iterator,
+                                      WarpTileIterator& warp_tile_iterator)
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < Advance; i++) {
+        ++accum_fragment_iterator;
+      }
+
+      typename AccumulatorFragmentIterator::Fragment accum_fragment;
+      accum_fragment_iterator.load(accum_fragment);
+      warp_tile_iterator.store(accum_fragment);
+    }
+
+    CUTLASS_DEVICE
+    static void push(size_t pos,
+                     AccumulatorFragmentIterator const& iterator_begin,
+                     WarpTileIterator& warp_tile_iterator)
+    {
+      int dummy[] = {(pos == Seq) && (helper<Seq>(iterator_begin, warp_tile_iterator), 0)...};
+    }
+  };
+
+  /// Streams the result to global memory
+  CUTLASS_DEVICE
+  void compute_source_needed_(
+    OutputOp const& output_op,  ///< Output operator
+    BroadcastFragment const&
+      broadcast_fragment,  ///< Fragment containing the accumulated partial reduction over columns
+    AccumulatorTile const& accumulators,  ///< Complete warp-level accumulator tile
+    OutputTileIterator
+      source_iterator,  ///< Threadblock tile coordinate in GEMM (in units of threadblock tiles)
+    TensorTileIterator tensor_iterator  ///< Threadblock tile iterator for additioanl tensor operand
+  )
+  {
+    typename OutputTileIterator::Fragment source_fragment;
+    source_fragment.clear();
+
+    //
+    // Iterator over warp-level accumulator fragment
+    //
+
+    AccumulatorFragmentIterator accum_fragment_iterator(accumulators);
+
+    //
+    // Iterate over accumulator tile
+    //
+
+#pragma unroll(IterationsUnroll ? OutputTileIterator::kIterations : 1)
+    for (int iter = 0; iter < OutputTileIterator::kIterations; ++iter) {
+      //
+      // Convert and store fragment
+      //
+
+      //__syncthreads();
+
+      acc2smem_source_needed<cutlass::make_index_sequence<OutputTileIterator::kIterations>>::push(
+        iter, accum_fragment_iterator, this->warp_tile_iterator_);
+
+      __syncthreads();
+
+      //
+      // Load fragments from shared memory
+      //
+
+      typename SharedLoadIterator::Fragment aligned_accum_fragment[kPartitionsK];
+
+      shared_load_iterator_.load(aligned_accum_fragment[0]);
+
+      //
+      // Apply output operation
+      //
+
+      typename TensorTileIterator::Fragment frag_T;
+
+      //
+      // Load the source
+      //
+
+      source_iterator.load(source_fragment);
+      ++source_iterator;
+
+      apply_output_operator_(
+        frag_T, output_op, aligned_accum_fragment[0], source_fragment, broadcast_fragment);
+
+      //
+      // Conditionally store fragments
+      //
+      if (OutputOp::kStoreT) {
+        tensor_iterator.store(frag_T);
+        ++tensor_iterator;
+      }
+    }
+  }
+
+  /// Helper to invoke the output functor over each vector of output
+  CUTLASS_DEVICE
+  void apply_output_operator_(typename TensorTileIterator::Fragment& frag_T,
+                              OutputOp const& output_op,
+                              typename SharedLoadIterator::Fragment const& frag_AB,
+                              typename OutputTileIterator::Fragment const& frag_C,
+                              BroadcastFragment const& frag_Broadcast)
+  {
+    using AccessTypeT         = Array<typename TensorTileIterator::OutValT, kElementsPerAccess>;
+    using AccessTypeBroadcast = Array<ElementCompute, kElementsPerAccess>;
+
+    AccessTypeT* frag_T_ptr = reinterpret_cast<AccessTypeT*>(&frag_T);
+
+    AccumulatorAccessType const* frag_AB_ptr =
+      reinterpret_cast<AccumulatorAccessType const*>(&frag_AB);
+
+    OutputAccessType const* frag_C_ptr = reinterpret_cast<OutputAccessType const*>(&frag_C);
+
+    AccessTypeBroadcast const* frag_Broadcast_ptr =
+      reinterpret_cast<AccessTypeBroadcast const*>(&frag_Broadcast);
+
+    int const kOutputOpIterations =
+      TensorTileIterator::Fragment::kElements / TensorTileIterator::kElementsPerAccess;
+
+    CUTLASS_PRAGMA_UNROLL
+    for (int i = 0; i < kOutputOpIterations; ++i) {
+      output_op(frag_T_ptr[i],
+                frag_AB_ptr[i],
+                frag_C_ptr[(i / ThreadMap::Iterations::kColumn)],
+                frag_Broadcast_ptr[i % ThreadMap::Iterations::kColumn]);
+    }
+  }
+
+  /// Helper to invoke the output functor over each vector of output
+  CUTLASS_DEVICE
+  void apply_output_operator_source_not_needed_(
+    typename OutputTileIterator::Fragment& frag_Z,
+    typename TensorTileIterator::Fragment& frag_T,
+    OutputOp const& output_op,
+    typename SharedLoadIterator::Fragment const& frag_AB,
+    BroadcastFragment const& frag_Broadcast)
+  {
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace threadblock
+}  // namespace epilogue
+}  // namespace cutlass
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/cutlass_base.cuh b/cpp/include/raft/distance/detail/fused_distance_nn/cutlass_base.cuh
new file mode 100644
index 0000000000..a1cf1a9b17
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/cutlass_base.cuh
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wtautological-compare"
+
+// We define CUTLASS_NAMESPACE in case
+// RAFT cmake is not used
+#ifndef CUTLASS_NAMESPACE
+#define cutlass raft_cutlass
+#endif
+
+#include <cutlass/cutlass.h>
+#include <cutlass/gemm/device/gemm.h>
+#include <cutlass/gemm/device/gemm_grouped.h>
+#include <cutlass/gemm/device/gemm_universal_adapter.h>
+#include <rmm/device_uvector.hpp>
+
+#include <cutlass/layout/matrix.h>
+#include <cutlass/layout/tensor.h>
+#include <cutlass/matrix_coord.h>
+#include <cutlass/tensor_view.h>
+
+#include <raft/distance/detail/fused_distance_nn/epilogue_elementwise.cuh>  // FusedDistanceNNEpilogueElementwise
+#include <raft/distance/detail/fused_distance_nn/gemm.h>                    // FusedDistanceNNGemm
+#include <raft/util/cudart_utils.hpp>   // getMultiProcessorCount
+#include <raft/util/cutlass_utils.cuh>  // RAFT_CUTLASS_TRY
+
+namespace raft {
+namespace distance {
+namespace detail {
+
+template <typename DataT,
+          typename AccT,
+          typename OutT,
+          typename IdxT,
+          int VecLen,
+          typename CGReduceOpT,
+          typename DistanceFn,
+          typename ReduceOpT,
+          typename KVPReduceOpT>
+void cutlassFusedDistanceNN(const DataT* x,
+                            const DataT* y,
+                            const DataT* xn,
+                            const DataT* yn,
+                            IdxT m,
+                            IdxT n,
+                            IdxT k,
+                            IdxT lda,
+                            IdxT ldb,
+                            IdxT ldd,
+                            OutT* dOutput,
+                            int* mutexes,
+                            CGReduceOpT cg_reduce_op,
+                            DistanceFn dist_op,
+                            ReduceOpT redOp,
+                            KVPReduceOpT pairRedOp,
+                            cudaStream_t stream)
+{
+  using EpilogueOutputOp = cutlass::epilogue::thread::FusedDistanceNNEpilogueElementwise<
+    DataT,  // ElementC_
+    AccT,   // ElementAccumulator_
+    DataT,  // ElementCompute_
+    AccT,   // ElementZ_
+    OutT,   // ElementT_
+    // 128 / cutlass::sizeof_bits<DataT>::value,
+    1,  // Elements per access 1
+    DistanceFn,
+    CGReduceOpT,
+    ReduceOpT,
+    KVPReduceOpT>;
+  constexpr int batch_count = 1;
+
+  typename EpilogueOutputOp::Params epilog_op_param(
+    dist_op, cg_reduce_op, redOp, pairRedOp, mutexes);
+
+  // Number of pipelines you want to use
+  constexpr int NumStages = 3;
+  // Alignment
+  constexpr int Alignment = VecLen;
+
+  // default initialize problem size with row major inputs
+  auto problem_size = cutlass::gemm::GemmCoord(m, n, k);
+
+  constexpr bool isRowMajor = true;
+
+  using fusedDistanceNNKernel =
+    typename cutlass::gemm::kernel::FusedDistanceNNGemm<DataT,
+                                                        Alignment,
+                                                        DataT,
+                                                        Alignment,
+                                                        AccT,
+                                                        AccT,
+                                                        EpilogueOutputOp,
+                                                        NumStages,  // Number of pipeline stages
+                                                        isRowMajor>::GemmKernel;
+
+  using fusedDistanceNN = cutlass::gemm::device::GemmGrouped<fusedDistanceNNKernel>;
+
+  int num_blocks_per_sm   = fusedDistanceNN::maximum_active_blocks();
+  int num_sms             = raft::getMultiProcessorCount();
+  int full_wave           = num_blocks_per_sm * num_sms;
+  constexpr int mmaShapeM = fusedDistanceNNKernel::Mma::Shape::kM;
+  constexpr int mmaShapeN = fusedDistanceNNKernel::Mma::Shape::kN;
+  int columnTiles         = (problem_size.n() - 1 + mmaShapeN) / mmaShapeN;
+  int rowTiles            = (problem_size.m() - 1 + mmaShapeM) / mmaShapeM;
+  int totalTiles          = columnTiles * rowTiles;
+  int thread_blocks =
+    rowTiles < full_wave ? (totalTiles < full_wave ? totalTiles : full_wave) : rowTiles;
+
+  typename fusedDistanceNN::Arguments arguments{
+    problem_size,
+    batch_count,  // num of problems.
+    thread_blocks,
+    epilog_op_param,
+    x,
+    y,
+    xn,            // C matrix eq vector param, which here is A norm
+    (DataT*)yn,    // this is broadcast vec, which is required to be non-const param
+    dOutput,       // Output distance matrix
+    (int64_t)lda,  // stride A
+    (int64_t)ldb,  // stride B
+    (int64_t)1,    // stride A norm
+    (int64_t)ldd   // stride Output matrix
+  };
+
+  // Using the arguments, query for extra workspace required for matrix multiplication computation
+  size_t workspace_size = fusedDistanceNN::get_workspace_size(arguments);
+  // Allocate workspace memory
+  rmm::device_uvector<uint8_t> workspace(workspace_size, stream);
+  // Instantiate CUTLASS kernel depending on templates
+  fusedDistanceNN fusedDistanceNN_op;
+  // Check the problem size is supported or not
+  RAFT_CUTLASS_TRY(fusedDistanceNN_op.can_implement(arguments));
+  // Initialize CUTLASS kernel with arguments and workspace pointer
+  RAFT_CUTLASS_TRY(fusedDistanceNN_op.initialize(arguments, workspace.data(), stream));
+  // Launch initialized CUTLASS kernel
+  RAFT_CUTLASS_TRY(fusedDistanceNN_op.run(stream));
+}
+
+};  // namespace detail
+};  // namespace distance
+};  // namespace raft
+
+#pragma GCC diagnostic pop
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/epilogue.cuh b/cpp/include/raft/distance/detail/fused_distance_nn/epilogue.cuh
new file mode 100644
index 0000000000..8a0bea3469
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/epilogue.cuh
@@ -0,0 +1,136 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*! \file
+  \brief Epilogue for threadblock scoped GEMMs using Tensor Ops.
+
+This is adapted from DefaultEpilogueWithBroadcastTensorOp from CUTLASS 2.9.0
+(https://github.com/NVIDIA/cutlass/blob/master/include/cutlass/epilogue/threadblock/default_epilogue_with_broadcast.h#L75)
+
+This epilogue allows us to load norm buffers using PredicatedTileIteratorNormVec
+and EpilogueWithBroadcast used for distances L2/cosine as well as applies user-define elementwise
+operation.
+-- A norm load is provided PredicatedTileIteratorNormVec
+-- B norm load is provided by EpilogueWithBroadcast
+-- elementwise operation is provided by OutputOp
+*/
+
+#pragma once
+
+#include <cutlass/array.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/numeric_types.h>
+
+#include <cutlass/gemm/gemm.h>
+
+#include <cutlass/epilogue/threadblock/default_epilogue_tensor_op.h>
+#include <cutlass/epilogue/threadblock/default_epilogue_volta_tensor_op.h>
+#include <cutlass/epilogue/threadblock/epilogue.h>
+#include <raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h>
+
+#include <raft/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem.h>
+#include <raft/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace epilogue {
+namespace threadblock {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Defines sensible defaults for epilogues for TensorOps.
+template <typename Shape,
+          typename WarpMmaTensorOp,
+          int PartitionsK,
+          typename ElementOutput,
+          typename ElementTensor,
+          typename ElementVector,
+          typename OutputOp,
+          typename LayoutT,
+          int ElementsPerAccess,
+          bool ScatterD = false>
+struct FusedDistanceNNEpilogue {
+  /// Use defaults related to the existing epilogue
+  using Base =
+    DefaultEpilogueTensorOp<Shape, WarpMmaTensorOp, PartitionsK, OutputOp, ElementsPerAccess>;
+
+  //
+  // Stores the result z = (y = GEMM(A, B, C), broadcast)
+  //
+  using RowNormTileIterator = cutlass::epilogue::threadblock::
+    PredicatedTileIteratorNormVecSmem<typename Base::OutputTileThreadMap, ElementOutput, LayoutT>;
+
+  //
+  // Additional tensor tile iterator - stores t = Elementwise(z)
+  //
+  using OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIteratorReducedVec<
+    typename Base::OutputTileThreadMap,
+    ElementTensor,
+    LayoutT,
+    typename OutputOp::Params>;
+
+  /// Define the epilogue
+  using Epilogue = cutlass::epilogue::threadblock::EpilogueWithBroadcastCustom<
+    Shape,
+    WarpMmaTensorOp,
+    PartitionsK,
+    RowNormTileIterator,
+    OutputTileIterator,
+    ElementVector,
+    typename Base::AccumulatorFragmentIterator,
+    typename Base::WarpTileIterator,
+    typename Base::SharedLoadIterator,
+    OutputOp,
+    typename Base::Padding,
+    Base::kFragmentsPerIteration>;
+};
+
+}  // namespace threadblock
+}  // namespace epilogue
+}  // namespace cutlass
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/epilogue_elementwise.cuh b/cpp/include/raft/distance/detail/fused_distance_nn/epilogue_elementwise.cuh
new file mode 100644
index 0000000000..a21f3d60e0
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/epilogue_elementwise.cuh
@@ -0,0 +1,216 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+/*! \file
+  \brief Functor performing distance operations used by epilogues of pairwise distance
+  * kernels.
+* This is adapted from LinearCombinationBiasElementwise from CUTLASS 2.9.0
+* customized for applying elementwise distance formula on accumulated GEMM value
+* and applying user-defined operation which can convert distance values to key-value pair.
+* .
+*/
+
+#pragma once
+
+#include <cutlass/array.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/functional.h>
+#include <cutlass/numeric_conversion.h>
+#include <cutlass/numeric_types.h>
+
+#include <cutlass/epilogue/thread/activation.h>
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace epilogue {
+namespace thread {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// This base class is meant to define the concept required of the
+/// EpilogueWithBroadcast::OutputOp
+template <typename ElementC_,
+          typename ElementAccumulator_,
+          typename ElementCompute_,
+          typename ElementZ_,
+          typename ElementT_,
+          int ElementsPerAccess,
+          typename DistanceOp_,
+          typename CGReduceOp_,
+          typename ReduceOpT_,
+          typename KVPReduceOpT_>
+class FusedDistanceNNEpilogueElementwise {
+ public:
+  using ElementOutput                 = ElementC_;
+  using ElementC                      = ElementC_;
+  using ElementAccumulator            = ElementAccumulator_;
+  using ElementCompute                = ElementCompute_;
+  using ElementZ                      = ElementZ_;
+  using ElementT                      = ElementT_;
+  static int const kElementsPerAccess = ElementsPerAccess;
+  static int const kCount             = kElementsPerAccess;
+
+  using DistanceOp = DistanceOp_;
+  using CGReduceOp = CGReduceOp_;
+
+  using FragmentAccumulator = Array<ElementAccumulator, kElementsPerAccess>;
+  using FragmentCompute     = Array<ElementCompute, kElementsPerAccess>;
+  using FragmentC           = Array<ElementOutput, kElementsPerAccess>;
+  using FragmentZ           = Array<ElementZ, kElementsPerAccess>;
+  using OutValT             = typename CGReduceOp::AccTypeT;
+  using FragmentT           = Array<OutValT, kElementsPerAccess>;
+
+  using FragmentOutput = FragmentZ;
+
+  static bool const kIsHeavy = true;  // ElementwiseOp::kIsHeavy;
+
+  /// If true, the 'Z' tensor is stored
+  static bool const kStoreZ = false;  // We don't store anything in Z,
+
+  /// If true, the 'T' tensor is stored
+  static bool const kStoreT = true;  // this is our final output storage.
+
+  /// Host-constructable parameters structure
+  struct Params {
+    CGReduceOp_ cg_reduce_op;
+    DistanceOp_ dist_op_;
+    KVPReduceOpT_ pair_redop_;
+    ReduceOpT_ red_op_;
+    int* mutexes_;
+    using CGReduceT = CGReduceOp_;
+    //
+    // Methods
+    //
+    CUTLASS_HOST_DEVICE
+    Params(DistanceOp_ dist_op,
+           CGReduceOp cg_reduce_op,
+           ReduceOpT_ red_op,
+           KVPReduceOpT_ pair_redop,
+           int* mutexes)
+      : cg_reduce_op(cg_reduce_op),
+        dist_op_(dist_op),
+        pair_redop_(pair_redop),
+        red_op_(red_op),
+        mutexes_(mutexes)
+    {
+    }
+
+    CUTLASS_HOST_DEVICE
+    Params() {}
+  };
+
+ private:
+  //
+  // Data members
+  //
+  DistanceOp_ elementwise_op;
+  KVPReduceOpT_ pair_redop;
+
+ public:
+  ReduceOpT_ red_op;
+
+  //
+  // Methods
+  //
+
+  /// Constructor from Params
+  CUTLASS_HOST_DEVICE
+  FusedDistanceNNEpilogueElementwise(Params const& params)
+    : elementwise_op(params.dist_op_), pair_redop(params.pair_redop_), red_op(params.red_op_)
+  {
+  }
+
+  /// Returns true if source is needed
+  CUTLASS_HOST_DEVICE
+  bool is_source_needed() const
+  {
+    // we use for making sure C matrix is used for A mat norm.
+    return true;
+  }
+
+  /// Functionally required for serial reduction in the epilogue
+  CUTLASS_HOST_DEVICE
+  void set_k_partition(int k_partition, int k_partition_count) {}
+
+  /// Applies the operation when is_source_needed() is true
+  CUTLASS_HOST_DEVICE
+  void operator()(FragmentT& frag_T,
+                  FragmentAccumulator const& AB,
+                  FragmentC const& frag_C,
+                  FragmentCompute const& V) const
+  {
+    FragmentCompute tmp_Accum =
+      NumericArrayConverter<ElementCompute, ElementAccumulator, kElementsPerAccess>()(AB);
+    FragmentCompute tmp_C =
+      NumericArrayConverter<ElementCompute, ElementC, kElementsPerAccess>()(frag_C);
+    FragmentCompute result_Z;
+
+    CUTLASS_PRAGMA_UNROLL
+    for (int i = 0; i < kElementsPerAccess; ++i) {
+      ElementCompute res_Z = elementwise_op(tmp_C[i], V[i], tmp_Accum[i]);
+      frag_T[i]            = res_Z;
+    }
+  }
+
+  /// Applies the operation when is_source_needed() is false
+  CUTLASS_HOST_DEVICE
+  void operator()(FragmentZ& frag_Z,
+                  FragmentT& frag_T,
+                  FragmentAccumulator const& AB,
+                  FragmentCompute const& V) const
+  {
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace thread
+}  // namespace epilogue
+}  // namespace cutlass
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h
new file mode 100644
index 0000000000..3da8b3ee3d
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/gemm.h
@@ -0,0 +1,410 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cutlass/cutlass.h>
+
+#include <cutlass/gemm/kernel/default_gemm_universal.h>
+#include <cutlass/layout/matrix.h>
+#include <cutlass/layout/tensor.h>
+
+#include <raft/distance/detail/fused_distance_nn/epilogue.cuh>
+#include <raft/distance/detail/fused_distance_nn/persistent_gemm.h>
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+/*
+ * This configuration is used for float inputs with veclen(kAlignmentA/B) = 2 or 4,
+ * ideal threadblock tile shape is 32x256x16 for such cases as there is no
+ * registers spills for it.
+ *
+ */
+template <
+  /// Element type for A matrix operand
+  typename ElementA_,
+  /// Layout type for A matrix operand
+  int kAlignmentA,
+  /// Element type for B matrix operand
+  typename ElementB_,
+  /// Layout type for B matrix operand
+  int kAlignmentB,
+  /// Element type for C and D matrix operands
+  typename ElementC_,
+  /// Element type for internal accumulation
+  typename ElementAccumulator,
+  /// Epilogue output operator      - must satisfy concept of 'EpilogueWithBroadcastOp'
+  typename EpilogueOutputOp,
+  /// Number of stages used in the pipelined mainloop
+  int Stages,
+  /// data layout row/column major of inputs
+  bool isRowMajor>
+struct FusedDistanceNNGemm {
+  // This struct is specialized for fp32/3xTF32
+
+  /// Threadblock-level tile size (concept: GemmShape)
+  // <- threadblock tile M = 32, N = 256, K = 16
+  // this is more performant but note that for veclen = 1
+  // this shape has register spills
+  using ThreadblockShape = cutlass::gemm::GemmShape<32, 256, 16>;
+
+  // <- threadblock tile M = 32, N = 128, K = 16
+  // this shape has high occupancy but less perf
+  // this is less performant but this shape has *no* register spills
+  // for any veclens(1, 2, 4)
+  // using ThreadblockShape = cutlass::gemm::GemmShape<32, 128, 16>;
+
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes tile size a warp will compute
+  // <- warp tile M = 64, N = 64, K = 16
+  // this is more performant for veclen 2,4.
+  using WarpShape = cutlass::gemm::GemmShape<32, 64, 16>;
+
+  //  this shape has high occupancy but less perf used for 32x128x16
+  // using WarpShape = cutlass::gemm::GemmShape<32, 32, 16>;
+
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes the size of MMA op
+  // <- MMA Op tile M = 16, N = 8, K = 4
+  using InstructionShape = cutlass::gemm::GemmShape<16, 8, 4>;
+
+  /// Operation performed by GEMM
+  using Operator = cutlass::arch::OpMultiplyAddFastF32;
+  // using Operator = cutlass::arch::OpMultiplyAdd; // this runs only 1xTF32 for float inputs
+
+  // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU
+  // SM
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+
+  // This code section describes CUDA SM architecture number
+  using ArchTag = cutlass::arch::Sm80;
+
+  // This code section describes how threadblocks are scheduled on GPU
+  /// Threadblock-level swizzling operator
+  using ThreadblockSwizzle = cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>;
+
+  /// data layout for final output matrix.
+  // we keep this same layout even for column major inputs
+  using LayoutOutput = cutlass::layout::RowMajor;
+
+  typedef typename std::conditional<isRowMajor,
+                                    cutlass::layout::RowMajor,
+                                    cutlass::layout::ColumnMajor>::type NormXLayout;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::RowMajor, cutlass::layout::ColumnMajor>::type LayoutA_;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::ColumnMajor, cutlass::layout::RowMajor>::type LayoutB_;
+
+  using GemmBase = typename DefaultGemmUniversal<ElementA_,
+                                                 LayoutA_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 kAlignmentA,
+                                                 ElementB_,
+                                                 LayoutB_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 kAlignmentB,
+                                                 ElementC_,
+                                                 LayoutOutput,
+                                                 ElementAccumulator,
+                                                 OperatorClass,
+                                                 ArchTag,
+                                                 ThreadblockShape,
+                                                 WarpShape,
+                                                 InstructionShape,
+                                                 EpilogueOutputOp,
+                                                 ThreadblockSwizzle,
+                                                 Stages,
+                                                 Operator>::GemmKernel;
+
+  // Replace epilogue
+  using Epilogue = typename cutlass::epilogue::threadblock::FusedDistanceNNEpilogue<
+    typename GemmBase::Epilogue::Shape,
+    typename GemmBase::Epilogue::WarpMmaOperator,
+    GemmBase::Epilogue::kPartitionsK,
+    ElementAccumulator,
+    typename EpilogueOutputOp::ElementT,
+    ElementAccumulator,
+    EpilogueOutputOp,
+    NormXLayout,
+    GemmBase::Epilogue::kElementsPerAccess>::Epilogue;
+
+  // Compose the GEMM kernel
+  using GemmKernel = FusedDistanceNNPersistent<typename GemmBase::Mma,
+                                               Epilogue,
+                                               ThreadblockSwizzle,
+                                               GroupScheduleMode::kDeviceOnly>;
+};
+
+/*
+ * This configuration is used for float inputs with veclen(kAlignmentA/B) = 1,
+ * ideal threadblock tile shape is 32x128x16 for such cases as there is no
+ * registers spills for it.
+ *
+ */
+template <
+  /// Element type for C and D matrix operands
+  typename ElementC_,
+  /// Element type for internal accumulation
+  typename ElementAccumulator,
+  /// Epilogue output operator      - must satisfy concept of 'EpilogueWithBroadcastOp'
+  typename EpilogueOutputOp,
+  /// Number of stages used in the pipelined mainloop
+  int Stages,
+  /// data layout row/column major of inputs
+  bool isRowMajor>
+struct FusedDistanceNNGemm<float,  /// Element type for A matrix operand
+                           1,      /// Layout type (veclen) for A matrix operand
+                           float,  /// Element type for B matrix operand
+                           1,      /// Layout type (veclen) for B matrix operand
+                           ElementC_,
+                           ElementAccumulator,
+                           EpilogueOutputOp,
+                           Stages,
+                           isRowMajor> {
+  // This struct is specialized for fp32/3xTF32
+  using ElementA_ = float;
+  using ElementB_ = float;
+
+  /// Threadblock-level tile size (concept: GemmShape)
+  // <- threadblock tile M = 32, N = 128, K = 16
+  // this shape has high occupancy and no register spills for veclen = 1.
+  using ThreadblockShape = cutlass::gemm::GemmShape<32, 128, 16>;
+
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes tile size a warp will compute
+  // <- warp tile M = 32, N = 32, K = 16
+  using WarpShape = cutlass::gemm::GemmShape<32, 32, 16>;
+
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes the size of MMA op
+  // <- MMA Op tile M = 16, N = 8, K = 4
+  using InstructionShape = cutlass::gemm::GemmShape<16, 8, 4>;
+
+  /// Operation performed by GEMM
+  using Operator = cutlass::arch::OpMultiplyAddFastF32;
+  // using Operator = cutlass::arch::OpMultiplyAdd; // this runs only 1xTF32 for float inputs
+
+  // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU
+  // SM
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+
+  // This code section describes CUDA SM architecture number
+  using ArchTag = cutlass::arch::Sm80;
+
+  // This code section describes how threadblocks are scheduled on GPU
+  /// Threadblock-level swizzling operator
+  using ThreadblockSwizzle = cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>;
+
+  /// data layout for final output matrix.
+  // we keep this same layout even for column major inputs
+  using LayoutOutput = cutlass::layout::RowMajor;
+
+  typedef typename std::conditional<isRowMajor,
+                                    cutlass::layout::RowMajor,
+                                    cutlass::layout::ColumnMajor>::type NormXLayout;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::RowMajor, cutlass::layout::ColumnMajor>::type LayoutA_;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::ColumnMajor, cutlass::layout::RowMajor>::type LayoutB_;
+
+  using GemmBase = typename DefaultGemmUniversal<ElementA_,
+                                                 LayoutA_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 1,
+                                                 ElementB_,
+                                                 LayoutB_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 1,
+                                                 ElementC_,
+                                                 LayoutOutput,
+                                                 ElementAccumulator,
+                                                 OperatorClass,
+                                                 ArchTag,
+                                                 ThreadblockShape,
+                                                 WarpShape,
+                                                 InstructionShape,
+                                                 EpilogueOutputOp,
+                                                 ThreadblockSwizzle,
+                                                 Stages,
+                                                 Operator>::GemmKernel;
+
+  // Replace epilogue
+  using Epilogue = typename cutlass::epilogue::threadblock::FusedDistanceNNEpilogue<
+    typename GemmBase::Epilogue::Shape,
+    typename GemmBase::Epilogue::WarpMmaOperator,
+    GemmBase::Epilogue::kPartitionsK,
+    ElementAccumulator,
+    typename EpilogueOutputOp::ElementT,
+    ElementAccumulator,
+    EpilogueOutputOp,
+    NormXLayout,
+    GemmBase::Epilogue::kElementsPerAccess>::Epilogue;
+
+  // Compose the GEMM kernel
+  using GemmKernel = FusedDistanceNNPersistent<typename GemmBase::Mma,
+                                               Epilogue,
+                                               ThreadblockSwizzle,
+                                               GroupScheduleMode::kDeviceOnly>;
+};
+
+template <
+  /// Layout type for A matrix operand
+  int kAlignmentA,
+  /// Layout type for B matrix operand
+  int kAlignmentB,
+  /// Element type for C and D matrix operands
+  typename ElementC_,
+  /// Element type for internal accumulation
+  typename ElementAccumulator,
+  /// Epilogue output operator      - must satisfy concept of 'EpilogueWithBroadcastOp'
+  typename EpilogueOutputOp,
+  /// Number of stages used in the pipelined mainloop
+  int Stages,
+  /// data layout row/column major of inputs
+  bool isRowMajor>
+struct FusedDistanceNNGemm<double,
+                           kAlignmentA,
+                           double,
+                           kAlignmentB,
+                           ElementC_,
+                           ElementAccumulator,
+                           EpilogueOutputOp,
+                           Stages,
+                           isRowMajor> {
+  // Threadblock-level tile size (concept: GemmShape)
+  // <- threadblock tile M = 64, N = 64, K = 16
+  using ThreadblockShape = cutlass::gemm::GemmShape<64, 64, 16>;
+  // using ThreadblockShape = cutlass::gemm::GemmShape<16, 128, 16>;
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes tile size a warp will compute
+  // <- warp tile M = 32, N = 32, K = 16
+  using WarpShape = cutlass::gemm::GemmShape<32, 32, 16>;
+  // using WarpShape = cutlass::gemm::GemmShape<16, 32, 16>;
+  /// Warp-level tile size (concept: GemmShape)
+  // This code section describes the size of MMA op
+  using InstructionShape = cutlass::gemm::GemmShape<8, 8, 4>;
+
+  // Operation performed by GEMM
+  using Operator = cutlass::arch::OpMultiplyAdd;
+  // This code section describes whether you want to use tensor cores or regular SIMT cores on GPU
+  // SM
+  using OperatorClass = cutlass::arch::OpClassTensorOp;
+
+  // This code section describes CUDA SM architecture number
+  using ArchTag = cutlass::arch::Sm80;
+
+  // This code section describes how threadblocks are scheduled on GPU
+  /// Threadblock-level swizzling operator
+  using ThreadblockSwizzle = cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>;
+
+  /// data layout for final output matrix.
+  // we keep this same layout even for column major inputs
+  using LayoutOutput = cutlass::layout::RowMajor;
+
+  typedef typename std::conditional<isRowMajor,
+                                    cutlass::layout::RowMajor,
+                                    cutlass::layout::ColumnMajor>::type NormXLayout;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::RowMajor, cutlass::layout::ColumnMajor>::type LayoutA_;
+
+  typedef typename std::
+    conditional<isRowMajor, cutlass::layout::ColumnMajor, cutlass::layout::RowMajor>::type LayoutB_;
+
+  using GemmBase = typename DefaultGemmUniversal<double,
+                                                 LayoutA_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 1,
+                                                 double,
+                                                 LayoutB_,
+                                                 cutlass::ComplexTransform::kNone,
+                                                 1,
+                                                 ElementC_,
+                                                 LayoutOutput,
+                                                 ElementAccumulator,
+                                                 OperatorClass,
+                                                 ArchTag,
+                                                 ThreadblockShape,
+                                                 WarpShape,
+                                                 InstructionShape,
+                                                 EpilogueOutputOp,
+                                                 ThreadblockSwizzle,
+                                                 Stages,
+                                                 Operator>::GemmKernel;
+
+  // Replace epilogue
+  using Epilogue = typename cutlass::epilogue::threadblock::FusedDistanceNNEpilogue<
+    typename GemmBase::Epilogue::Shape,
+    typename GemmBase::Epilogue::WarpMmaOperator,
+    GemmBase::Epilogue::kPartitionsK,
+    ElementC_,
+    typename EpilogueOutputOp::ElementT,
+    ElementC_,
+    EpilogueOutputOp,
+    NormXLayout,
+    GemmBase::Epilogue::kElementsPerAccess>::Epilogue;
+
+  // Compose the GEMM kernel
+  using GemmKernel = FusedDistanceNNPersistent<typename GemmBase::Mma,
+                                               Epilogue,
+                                               ThreadblockSwizzle,
+                                               GroupScheduleMode::kDeviceOnly>;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
\ No newline at end of file
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/persistent_gemm.h b/cpp/include/raft/distance/detail/fused_distance_nn/persistent_gemm.h
new file mode 100644
index 0000000000..3a8d6c8655
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/persistent_gemm.h
@@ -0,0 +1,515 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*! \file
+    \brief Problem visitor for grouped GEMMs
+This file contains heavily customized version of GemmGrouped from CUTLASS 2.10.0
+(https://github.com/NVIDIA/cutlass/blob/v2.10.0/include/cutlass/gemm/kernel/gemm_grouped.h)
+
+Changes:
+- adds support for only single problem size to be launched persistently
+  where each threablock processes more than one tile of the same problem.
+*/
+
+#pragma once
+
+#include <cutlass/complex.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/fast_math.h>
+#include <cutlass/gemm/gemm.h>
+#include <cutlass/matrix_coord.h>
+#include <cutlass/semaphore.h>
+
+#include <cutlass/gemm/kernel/gemm_grouped_problem_visitor.h>
+#include <cutlass/gemm/kernel/gemm_transpose_operands.h>
+#include <cutlass/layout/matrix.h>
+#include <cutlass/trace.h>
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+namespace gemm {
+namespace kernel {
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename Mma_,                         ///! Threadblock-scoped matrix multiply-accumulate
+          typename Epilogue_,                    ///! Epilogue
+          typename ThreadblockSwizzle_,          ///! Threadblock swizzling function
+          GroupScheduleMode GroupScheduleMode_,  ///! Type of scheduling to perform
+          bool Transposed = false>
+struct FusedDistanceNNPersistent {
+ public:
+  using Mma                                         = Mma_;
+  using Epilogue                                    = Epilogue_;
+  using EpilogueOutputOp                            = typename Epilogue::OutputOp;
+  using ThreadblockSwizzle                          = ThreadblockSwizzle_;
+  static GroupScheduleMode const kGroupScheduleMode = GroupScheduleMode_;
+  static bool const kTransposed                     = Transposed;
+
+  // Optional transpose
+  using MapArguments = kernel::detail::MapArguments<typename Mma::IteratorA::Element,
+                                                    typename Mma::IteratorA::Layout,
+                                                    Mma::kTransformA,
+                                                    Mma::IteratorA::AccessType::kElements,
+                                                    typename Mma::IteratorB::Element,
+                                                    typename Mma::IteratorB::Layout,
+                                                    Mma::kTransformB,
+                                                    Mma::IteratorB::AccessType::kElements,
+                                                    typename Mma::LayoutC,
+                                                    kTransposed>;
+
+  // Public-facing type definitions related to operand element type, layout, and complex conjugate
+  // operation. Must interact with the 'kTransposed' notion.
+  using ElementA = typename MapArguments::ElementA;
+  using LayoutA  = typename MapArguments::LayoutA;
+  using ElementB = typename MapArguments::ElementB;
+  using LayoutB  = typename MapArguments::LayoutB;
+  using ElementC = typename Epilogue::OutputTileIterator::Element;
+  using LayoutC  = typename MapArguments::LayoutC;
+
+  static ComplexTransform const kTransformA = MapArguments::kTransformA;
+  static ComplexTransform const kTransformB = MapArguments::kTransformB;
+
+  // Type definitions about the mainloop.
+  using Operator         = typename Mma::Operator;
+  using OperatorClass    = typename Mma::Operator::OperatorClass;
+  using ThreadblockShape = typename Mma::Shape;
+  using WarpShape        = typename Mma::Operator::Shape;
+  using InstructionShape = typename Mma::Policy::Operator::InstructionShape;
+  using ArchTag          = typename Mma::ArchTag;
+
+  static int const kStages     = Mma::kStages;
+  static int const kAlignmentA = MapArguments::kAlignmentA;
+  static int const kAlignmentB = MapArguments::kAlignmentB;
+  static int const kAlignmentC = Epilogue::OutputTileIterator::kElementsPerAccess;
+
+  /// Warp count (concept: GemmShape)
+  using WarpCount               = typename Mma::WarpCount;
+  static int const kThreadCount = 32 * WarpCount::kCount;
+
+  using ProblemVisitor = GemmGroupedProblemVisitor<ThreadblockShape,
+                                                   kGroupScheduleMode,
+                                                   kThreadCount,
+                                                   kThreadCount,
+                                                   kTransposed>;
+
+  //
+  // Structures
+  //
+
+  struct temp_problem_visitor {
+    int problem_count;
+
+    CUTLASS_HOST_DEVICE temp_problem_visitor() : problem_count(0){};
+    CUTLASS_HOST_DEVICE temp_problem_visitor(int problem_count_) : problem_count(problem_count_){};
+  };
+
+  /// Argument structure
+  struct Arguments {
+    //
+    // Data members
+    //
+    GemmCoord problem_sizes;
+    temp_problem_visitor problem_visitor;
+    int problem_count;
+    int threadblock_count;
+
+    typename EpilogueOutputOp::Params output_op;
+
+    void const* ptr_A;
+    void const* ptr_B;
+    void const* ptr_C;
+    void* ptr_Vector;
+    void* ptr_Tensor;
+
+    typename LayoutA::Stride::Index lda;
+    typename LayoutB::Stride::Index ldb;
+    typename LayoutC::Stride::Index ldc;
+    typename LayoutC::Stride::Index ldt;
+
+    // Only used by device-level operator
+    GemmCoord* host_problem_sizes;
+
+    //
+    // Methods
+    //
+
+    /// Default ctor
+    CUTLASS_HOST_DEVICE
+    Arguments()
+      :  // problem_count(0),
+        threadblock_count(0),
+        ptr_A(nullptr),
+        ptr_B(nullptr),
+        ptr_C(nullptr),
+        ptr_Vector(nullptr),
+        ptr_Tensor(nullptr),
+        lda(0),
+        ldb(0),
+        ldc(0),
+        ldt(0),
+        host_problem_sizes(nullptr)
+    {
+    }
+
+    /// Ctor
+    CUTLASS_HOST_DEVICE
+    Arguments(GemmCoord problem_sizes,
+              int problem_count,
+              int threadblock_count,
+              typename EpilogueOutputOp::Params output_op,
+              void const* ptr_A,
+              void const* ptr_B,
+              void const* ptr_C,
+              void* ptr_Vector,
+              void* ptr_Tensor,
+              typename LayoutA::Stride::Index lda,
+              typename LayoutB::Stride::Index ldb,
+              typename LayoutC::Stride::Index ldc,
+              typename LayoutC::Stride::Index ldt,
+              GemmCoord* host_problem_sizes = nullptr)
+      : problem_sizes(problem_sizes),
+        threadblock_count(threadblock_count),
+        output_op(output_op),
+        ptr_A(ptr_A),
+        ptr_B(ptr_B),
+        ptr_C(ptr_C),
+        ptr_Vector(ptr_Vector),
+        ptr_Tensor(ptr_Tensor),
+        lda(lda),
+        ldb(ldb),
+        ldc(ldc),
+        ldt(ldt),
+        host_problem_sizes(host_problem_sizes)
+    {
+      problem_visitor.problem_count = problem_count;
+    }
+  };
+
+  //
+  // Structure for precomputing values in host memory and passing to kernels
+  //
+
+  /// Parameters structure
+  struct Params {
+    // typename ProblemVisitor::Params problem_visitor;
+    temp_problem_visitor problem_visitor;
+    int threadblock_count;
+
+    typename Mma::IteratorA::Params params_A;
+    typename Mma::IteratorB::Params params_B;
+    typename Epilogue::OutputTileIterator::Params params_C;
+    typename Epilogue::TensorTileIterator::Params params_Tensor;
+
+    typename EpilogueOutputOp::Params output_op;
+
+    void* ptr_A;
+    void* ptr_B;
+    void* ptr_C;
+    void* ptr_Vector;
+    void* ptr_Tensor;
+
+    GemmCoord problem_size;
+    typename LayoutA::Stride::Index lda;
+    typename LayoutB::Stride::Index ldb;
+    typename LayoutC::Stride::Index ldc;
+    typename LayoutC::Stride::Index ldt;
+
+    //
+    // Methods
+    //
+
+    CUTLASS_HOST_DEVICE
+    Params()
+      : params_A(0),
+        params_B(0),
+        params_C(0),
+        ptr_A(nullptr),
+        ptr_B(nullptr),
+        ptr_C(nullptr),
+        ptr_Vector(nullptr),
+        ptr_Tensor(nullptr),
+        lda(0),
+        ldb(0),
+        ldc(0),
+        ldt(0)
+    {
+    }
+
+    CUTLASS_HOST_DEVICE
+    Params(Arguments const& args, void* workspace = nullptr, int tile_count = 0)
+      : problem_size(args.problem_sizes),
+        threadblock_count(args.threadblock_count),
+        output_op(args.output_op),
+        params_A(args.lda),
+        params_B(args.ldb),
+        params_C(args.ldc),
+        // Here we pass additional user args via args.output_op
+        // to the reduction output tile iterator
+        params_Tensor(args.ldt, args.output_op),
+        ptr_A(const_cast<void*>(args.ptr_A)),
+        ptr_B(const_cast<void*>(args.ptr_B)),
+        ptr_C(const_cast<void*>(args.ptr_C)),
+        ptr_Vector(args.ptr_Vector),
+        ptr_Tensor(args.ptr_Tensor),
+        lda(args.lda),
+        ldb(args.ldb),
+        ldc(args.ldc),
+        ldt(args.ldt)
+    {
+      problem_visitor.problem_count = args.problem_visitor.problem_count;
+    }
+
+    CUTLASS_HOST_DEVICE
+    void update(Arguments const& args, void* workspace = nullptr, int tile_count = 0)
+    {
+      threadblock_count = args.threadblock_count;
+      output_op         = args.output_op;
+      ptr_A             = const_cast<void*>(args.ptr_A);
+      ptr_B             = const_cast<void*>(args.ptr_B);
+      ptr_C             = const_cast<void*>(args.ptr_C);
+      ptr_Vector        = args.ptr_Vector;
+      ptr_Tensor        = args.ptr_Tensor;
+      lda               = args.lda;
+      ldb               = args.ldb;
+      ldc               = args.ldc;
+      ldt               = args.ldt;
+
+      problem_size = args.problem_sizes;
+    }
+  };
+
+  /// Shared memory storage structure
+  struct SharedStorage {
+    union {
+      typename Mma::SharedStorage main_loop;
+      typename Epilogue::SharedStorage epilogue;
+    } kernel;
+
+    typename Epilogue::TensorTileIterator::SharedStorage reduced_store;
+    typename Epilogue::OutputTileIterator::SharedStorage rownorm_store;
+  };
+
+ public:
+  //
+  // Methods
+  //
+
+  CUTLASS_DEVICE
+  FusedDistanceNNPersistent() {}
+
+  /// Determines whether kernel satisfies alignment
+  static Status can_implement(cutlass::gemm::GemmCoord const& problem_size)
+  {
+    return Status::kSuccess;
+  }
+
+  static Status can_implement(Arguments const& args) { return Status::kSuccess; }
+
+  static size_t get_extra_workspace_size(Arguments const& args,
+                                         cutlass::gemm::GemmCoord const& grid_tiled_shape)
+  {
+    return 0;
+  }
+
+  CUTLASS_DEVICE
+  static uint32_t tile_count(const cutlass::MatrixCoord& grid)
+  {
+    return grid.row() * grid.column();
+  }
+
+  /// Get the grid shape
+  CUTLASS_DEVICE
+  static cutlass::MatrixCoord grid_shape(const cutlass::gemm::GemmCoord& problem)
+  {
+    return cutlass::MatrixCoord(((problem.m() - 1 + ThreadblockShape::kM) / ThreadblockShape::kM),
+                                ((problem.n() - 1 + ThreadblockShape::kN) / ThreadblockShape::kN));
+  }
+
+  /// Executes one GEMM
+  CUTLASS_DEVICE
+  void operator()(Params const& params, SharedStorage& shared_storage)
+  {
+#if __CUDA_ARCH__ >= 800
+    //
+    // These types shadow the type-level definitions and support the ability to implement
+    // a 'transposed' GEMM that computes the transposed problems.
+    //
+    using ElementA = typename Mma::IteratorA::Element;
+    using LayoutA  = typename Mma::IteratorA::Layout;
+    using ElementB = typename Mma::IteratorB::Element;
+    using LayoutB  = typename Mma::IteratorB::Layout;
+    using ElementC = typename Epilogue::OutputTileIterator::Element;
+    using LayoutC  = typename Epilogue::OutputTileIterator::Layout;
+
+    const GemmCoord& problem_size    = params.problem_size;
+    const auto grid_shape_           = grid_shape(problem_size);
+    const uint32_t problem_chunk     = (tile_count(grid_shape_) - 1 + gridDim.x) / gridDim.x;
+    const uint32_t problem_chunk_end = blockIdx.x * problem_chunk + problem_chunk;
+    typename LayoutB::Index column =
+      ((blockIdx.x * problem_chunk) % grid_shape_.column()) * Mma::Shape::kN;
+
+    typename LayoutB::Index row =
+      ((blockIdx.x * problem_chunk) / grid_shape_.column()) * Mma::Shape::kM;
+    if (column) {
+      shared_storage.reduced_store.initSmem(params.output_op);
+      shared_storage.rownorm_store.initSmem(params.ptr_C, problem_size.m(), row, sizeof(ElementC));
+    }
+
+    // Outer 'persistent' loop to iterate over tiles
+    for (uint32_t tile_idx = blockIdx.x * problem_chunk; tile_idx < problem_chunk_end; tile_idx++) {
+      const auto grid_shape_ = grid_shape(problem_size);
+      cutlass::MatrixCoord threadblock_offset(
+        int(tile_idx / grid_shape_.column()) * Mma::Shape::kM,
+        int(tile_idx % grid_shape_.column()) * Mma::Shape::kN);
+
+      const bool isNextTile = ((tile_idx + 1) < problem_chunk_end);
+      const bool doesRowChange =
+        ((threadblock_offset.column() + Mma::Shape::kN) >= problem_size.n());
+      const bool do_gmem_reduce = (doesRowChange || !isNextTile) ? true : false;
+
+      ElementA* ptr_A = static_cast<ElementA*>(params.ptr_A);
+      ElementB* ptr_B = static_cast<ElementB*>(params.ptr_B);
+
+      // Compute initial location in logical coordinates
+      cutlass::MatrixCoord tb_offset_A{threadblock_offset.row(), 0};
+      cutlass::MatrixCoord tb_offset_B{0, threadblock_offset.column()};
+
+      // Compute position within threadblock
+      int thread_idx = threadIdx.x;
+
+      // Construct iterators to A and B operands
+      typename Mma::IteratorA iterator_A(
+        params.params_A, ptr_A, {problem_size.m(), problem_size.k()}, thread_idx, tb_offset_A);
+
+      typename Mma::IteratorB iterator_B(
+        params.params_B, ptr_B, {problem_size.k(), problem_size.n()}, thread_idx, tb_offset_B);
+
+      // Broadcast the warp_id computed by lane 0 to ensure dependent code
+      // is compiled as warp-uniform.
+      int warp_idx = __shfl_sync(0xffffffff, threadIdx.x / 32, 0);
+
+      int lane_idx = threadIdx.x % 32;
+
+      //
+      // Matrix multiply phase
+      //
+
+      // Construct thread-scoped matrix multiply
+      Mma mma(shared_storage.kernel.main_loop, thread_idx, warp_idx, lane_idx);
+
+      typename Mma::FragmentC accumulators;
+
+      accumulators.clear();
+      // Compute threadblock-scoped matrix multiply-add
+      int gemm_k_iterations = (problem_size.k() + Mma::Shape::kK - 1) / Mma::Shape::kK;
+
+      // Wait for all threads to finish their epilogue phases from the previous tile.
+      //__syncthreads();
+
+      // Compute threadblock-scoped matrix multiply-add
+      mma(gemm_k_iterations, accumulators, iterator_A, iterator_B, accumulators);
+
+      //
+      // Epilogue
+      //
+
+      EpilogueOutputOp output_op(params.output_op);
+
+      ElementC* ptr_C = static_cast<ElementC*>(params.ptr_C);
+      typename Epilogue::ElementTensor* ptr_Tensor =
+        static_cast<typename Epilogue::ElementTensor*>(params.ptr_Tensor);
+
+      // Define the reduction output pointer and move to the appropriate place
+      typename Epilogue::ElementVector* ptr_Vector =
+        static_cast<typename Epilogue::ElementVector*>(params.ptr_Vector);
+
+      // Tile iterator loading from source tensor.
+      typename Epilogue::OutputTileIterator iterator_rownorm(shared_storage.rownorm_store,
+                                                             params.params_C,
+                                                             ptr_C,
+                                                             problem_size.mn(),
+                                                             thread_idx,
+                                                             threadblock_offset);
+
+      // Additional tensor to load from
+      typename Epilogue::TensorTileIterator tensor_iterator(shared_storage.reduced_store,
+                                                            params.params_Tensor,
+                                                            // Only the final block outputs Tensor
+                                                            ptr_Tensor,
+                                                            problem_size.mn(),
+                                                            thread_idx,
+                                                            do_gmem_reduce,
+                                                            threadblock_offset);
+
+      Epilogue epilogue(shared_storage.kernel.epilogue, thread_idx, warp_idx, lane_idx);
+
+      // Execute the epilogue operator to update the destination tensor.
+      // Move to appropriate location for this output tile
+      if (ptr_Vector) { ptr_Vector += threadblock_offset.column(); }
+
+      // Execute the epilogue operator to update the destination tensor.
+      epilogue(output_op,
+               ptr_Vector,
+               // iterator_D,
+               accumulators,
+               iterator_rownorm,
+               tensor_iterator,
+               problem_size.mn(),
+               threadblock_offset);
+    }
+#endif
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+}  // namespace kernel
+}  // namespace gemm
+}  // namespace cutlass
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem.h b/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem.h
new file mode 100644
index 0000000000..c35a64f105
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem.h
@@ -0,0 +1,448 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*! \file
+  \brief Epilogue for threadblock scoped GEMMs using Tensor Ops.
+
+This file contains a customized version of PredicatedTileIterator from CUTLASS 2.9.0
+(https://github.com/NVIDIA/cutlass/blob/v2.9.0/include/cutlass/epilogue/threadblock/predicated_tile_iterator.h#L75)
+
+Changes:
+- added `Layout_` template param
+- Only the row index is used to load the data in load_with_byte_offset().
+  This way the same normalization data is used across all columns in a row.
+
+*/
+#pragma once
+
+#include <cutlass/arch/arch.h>
+#include <cutlass/arch/memory.h>
+#include <cutlass/array.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/epilogue/threadblock/output_tile_thread_map.h>
+#include <cutlass/epilogue/threadblock/predicated_tile_iterator_params.h>
+#include <cutlass/layout/matrix.h>
+#include <cutlass/layout/tensor.h>
+#include <cutlass/matrix_shape.h>
+#include <cutlass/numeric_types.h>
+#include <cutlass/tensor_ref.h>
+#include <cutlass/transform/pitch_linear_thread_map.h>
+
+#include <raft/util/device_loads_stores.cuh>
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace epilogue {
+namespace threadblock {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Tile iterator used to load and store output tile from global memory in epilogue.
+///
+/// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator
+///
+template <typename ThreadMap_,        ///< Thread map (conept: OutputTileThreadMap)
+          typename Element_,          ///< Element data type
+          typename Layout_,
+          bool ScatterD     = false,  ///< Scatter D operand or not
+          bool UseCUDAStore = false>
+class PredicatedTileIteratorNormVecSmem {
+ public:
+  using ThreadMap = ThreadMap_;
+  using Shape     = typename ThreadMap::Shape;
+
+  using Element = Element_;
+
+  using Layout         = Layout_;
+  using TensorRef      = TensorRef<Element, Layout>;
+  using ConstTensorRef = typename TensorRef::ConstTensorRef;
+
+  using Index       = typename Layout::Index;
+  using LongIndex   = typename Layout::LongIndex;
+  using TensorCoord = MatrixCoord;
+
+  static int const kElementsPerAccess = ThreadMap::kElementsPerAccess;
+  static int const kThreads           = ThreadMap::kThreads;
+  static int const kIterations        = ThreadMap::Count::kTile;
+
+  static int const total_rows = ThreadMap::kWarpCount * ThreadMap::Iterations::kRow *
+                                ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster *
+                                ThreadMap::Count::kTile * ThreadMap::Delta::kRow;
+
+  static_assert(ThreadMap::Iterations::kRow > 0, "ThreadMap::Iterations::kRow must be > 0");
+  static_assert(ThreadMap::Iterations::kGroup > 0, "ThreadMap::Iterations::kGroup must be > 0");
+  static_assert(ThreadMap::Iterations::kCluster > 0, "ThreadMap::Iterations::kCluster must be > 0");
+  static_assert(ThreadMap::Iterations::kColumn > 0, "ThreadMap::Iterations::kColumn must be > 0");
+
+  using Fragment = Array<Element,
+                         ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup *
+                           ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess>;
+
+  /// Memory access size
+  using AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess>;
+
+  //
+  // Parameters struct
+  //
+
+  /// Uses a non-template class
+  struct Params : PredicatedTileIteratorParams {
+    using Base = PredicatedTileIteratorParams;
+
+    CUTLASS_HOST_DEVICE
+    Params() {}
+
+    CUTLASS_HOST_DEVICE
+    Params(Layout const& layout)
+      : PredicatedTileIteratorParams(
+          layout.stride(0) * int(sizeof(AccessType)) / kElementsPerAccess,
+          make_OutputTileThreadMapDesc<ThreadMap>())
+    {
+    }
+
+    CUTLASS_HOST_DEVICE
+    Params(Base const& base) : Base(base) {}
+  };
+
+  /// Mask object
+  struct Mask {
+    static int const kCount = ThreadMap::Iterations::kColumn;
+
+    /// Predicate state
+    bool predicates[kCount];
+
+    //
+    // Mask
+    //
+    CUTLASS_HOST_DEVICE
+    Mask() { enable(); }
+
+    ///< Efficiently disables all accesses guarded by mask
+    CUTLASS_HOST_DEVICE void clear()
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < kCount; ++i) {
+        predicates[i] = false;
+      }
+    }
+
+    ///< CUTLASS_HOST_DEVICE enables all accesses guarded by mask
+    CUTLASS_DEVICE void enable()
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < kCount; ++i) {
+        predicates[i] = true;
+      }
+    }
+  };
+
+  /// Shared storage allocation needed by the predicated tile
+  //  iterator for storing rowNorm chunk.
+  struct SharedStorage {
+    //
+    // Type definitions
+    //
+    using Shape = MatrixShape<total_rows, 1>;
+
+    /// Shape of the shared memory allocation
+    using StorageShape = MatrixShape<Shape::kRow, Shape::kColumn>;
+
+    //
+    // Data members
+    //
+    // Methods
+    //
+    AlignedBuffer<Element, StorageShape::kCount> storage;
+
+    CUTLASS_DEVICE
+    Element* data() { return storage.data(); }
+
+    SharedStorage() {}
+
+    CUTLASS_DEVICE
+    void initSmem(void* pointer,
+                  const Index& num_rows,
+                  const Index& tb_row_offset,
+                  const LongIndex& stride)
+    {
+      Element* shared_elem_arr = data();
+      uint8_t* first_tile_byte_pointer_ =
+        reinterpret_cast<uint8_t*>(pointer) + LongIndex(tb_row_offset) * LongIndex(stride);
+      const auto gmem_ptr = reinterpret_cast<Element*>(first_tile_byte_pointer_);
+
+      for (int row = threadIdx.x; row < total_rows; row += blockDim.x) {
+        bool guard = (tb_row_offset + row) < num_rows;
+        cutlass::arch::cp_async<sizeof(Element)>(shared_elem_arr + row, gmem_ptr + row, guard);
+        cutlass::arch::cp_async_wait<0>();
+      }
+    }
+  };
+
+ private:
+  //
+  // Data members
+  //
+
+  /// Parameters structure containing reference and precomputed state.
+  PredicatedTileIteratorParams params_;
+
+  /// Byte-level pointer
+  uint8_t* byte_pointer_;
+
+  /// Array of boolean values to contain steady-state predicates
+  Mask mask_;
+
+  /// Extent of the matrix tile in rows
+  Index extent_row_;
+
+  /// Extent of the matrix tile in rows
+  Index extent_column_;
+
+  /// A thread's starting row position (assuming steady-state predicates have been computed)
+  Index thread_start_row_;
+
+  /// A thread's starting column
+  Index thread_start_column_;
+
+  /// Internal state counter
+  int state_[3];
+
+  /// Scatter indices
+  int const* indices_;
+
+  //
+  // Static asserts about internal strides
+  //
+
+  static_assert(sizeof(extent_row_) == 4, "Expected 32b extents");
+  static_assert(sizeof(thread_start_row_) == 4, "Expected 32b extents");
+  static_assert(sizeof(PredicatedTileIteratorParams::stride) == 8, "Expected 64b strides");
+
+ private:
+  //
+  // Methods
+  //
+
+ protected:
+  SharedStorage& shared_storage_;
+
+ public:
+  //
+  // Methods
+  //
+
+  /// Constructor
+  CUTLASS_DEVICE
+  PredicatedTileIteratorNormVecSmem(SharedStorage& shared_storage,
+                                    PredicatedTileIteratorParams const& params,
+                                    Element* pointer,
+                                    TensorCoord extent,
+                                    int thread_idx,
+                                    TensorCoord& threadblock_offset,
+                                    int const* indices = nullptr)
+    : params_(params), indices_(indices), shared_storage_(shared_storage)
+  {
+    TensorCoord thread_offset = ThreadMap::initial_offset(thread_idx) + threadblock_offset;
+
+    extent_row_    = extent.row();
+    extent_column_ = extent.column();
+
+    thread_start_row_    = thread_offset.row();
+    thread_start_column_ = thread_offset.column();
+
+    // Initialize predicates
+    CUTLASS_PRAGMA_UNROLL
+    for (int c = 0; c < ThreadMap::Iterations::kColumn; ++c) {
+      mask_.predicates[c] =
+        ((thread_offset.column() + ThreadMap::Delta::kColumn * c) < extent.column());
+    }
+
+    // Null pointer performs no accesses
+    if (!pointer) {
+      mask_.clear();
+      return;
+    }
+
+    if (ScatterD && !indices) { mask_.clear(); }
+
+    // Initialize pointer
+    byte_pointer_ = reinterpret_cast<uint8_t*>(pointer) +
+                    LongIndex(thread_offset.row()) * LongIndex(params_.stride);
+
+    if (ScatterD) {
+      byte_pointer_ = reinterpret_cast<uint8_t*>(pointer) +
+                      LongIndex(thread_offset.column()) * sizeof(AccessType) / kElementsPerAccess;
+    }
+
+    if (threadblock_offset.column() == 0) {
+      shared_storage_.initSmem(pointer, extent_row_, threadblock_offset.row(), params_.stride);
+    }
+
+    // Initialize internal state counter
+    state_[0] = state_[1] = state_[2] = 0;
+  }
+
+  /// Adds a pointer offset in units of Element
+  CUTLASS_HOST_DEVICE
+  void add_pointer_offset(LongIndex pointer_offset)
+  {
+    byte_pointer_ += pointer_offset * sizeof_bits<Element>::value / 8;
+  }
+
+  /// Loads a fragment from memory
+  CUTLASS_DEVICE
+  void load_with_byte_offset(Fragment& frag, int64_t byte_offset) const
+  {
+    AccessType* frag_ptr = reinterpret_cast<AccessType*>(&frag);
+
+    Element* shared_elem_arr = shared_storage_.data();
+
+    CUTLASS_PRAGMA_UNROLL
+    for (int cluster = 0; cluster < ThreadMap::Iterations::kCluster; ++cluster) {
+      CUTLASS_PRAGMA_UNROLL
+      for (int group = 0; group < ThreadMap::Iterations::kGroup; ++group) {
+        CUTLASS_PRAGMA_UNROLL
+        for (int row = 0; row < ThreadMap::Iterations::kRow; ++row) {
+          int frag_row_idx =
+            (row + ThreadMap::Iterations::kRow * (group + ThreadMap::Iterations::kGroup * cluster));
+
+          int row_offset = row * ThreadMap::Delta::kRow + group * ThreadMap::Delta::kGroup +
+                           cluster * ThreadMap::Delta::kCluster;
+          int iter_row = ((row_offset + thread_start_row_) % total_rows);
+          Element val  = shared_elem_arr[iter_row];
+
+          CUTLASS_PRAGMA_UNROLL
+          for (int i = 0; i < kElementsPerAccess; ++i) {
+            (*frag_ptr)[frag_row_idx + i] = val;
+          }
+        }
+      }
+    }
+  }
+
+  /// Loads a fragment from memory
+  CUTLASS_DEVICE
+  void load(Fragment& frag) const { load_with_byte_offset(frag, 0); }
+
+  CUTLASS_DEVICE
+  MatrixCoord thread_start() const { return MatrixCoord(thread_start_row_, thread_start_column_); }
+
+  /// Need to get the thread start row from the tile iterator
+  CUTLASS_DEVICE
+  int32_t thread_start_row() const { return thread_start_row_; }
+
+  /// Need to get the thread start row from the tile iterator
+  CUTLASS_DEVICE
+  int32_t thread_start_column() const { return thread_start_column_; }
+
+  /// Extent of the matrix in rows
+  CUTLASS_DEVICE
+  Index extent_row() const { return extent_row_; }
+
+  /// Extent of the matrix in columns
+  CUTLASS_DEVICE
+  Index extent_column() const { return extent_column_; }
+
+  /// Advances to the next position to load or store
+  CUTLASS_HOST_DEVICE
+  PredicatedTileIteratorNormVecSmem& operator++()
+  {
+    ++state_[0];
+
+    if (!ScatterD) { byte_pointer_ += params_.advance_row; }
+
+    thread_start_row_ += ThreadMap::Shape::kRow;
+
+    if (state_[0] == ThreadMap::Count::kRow) {
+      state_[0] = 0;
+      ++state_[1];
+      byte_pointer_ += params_.advance_group;
+
+      thread_start_row_ +=
+        (ThreadMap::Shape::kGroup - 1) * ThreadMap::Shape::kRow * ThreadMap::Count::kRow;
+
+      if (state_[1] == ThreadMap::Count::kGroup) {
+        state_[1] = 0;
+        ++state_[2];
+        byte_pointer_ += params_.advance_cluster;
+
+        thread_start_row_ += ThreadMap::Count::kGroup * ThreadMap::Shape::kGroup *
+                             ThreadMap::Count::kRow * ThreadMap::Shape::kRow;
+
+        if (state_[2] == ThreadMap::Count::kCluster) {
+          state_[2] = 0;
+          byte_pointer_ += params_.advance_tile;
+        }
+      }
+    }
+
+    return *this;
+  }
+
+  ///< Efficiently disables all accesses guarded by mask
+  CUTLASS_DEVICE void clear_mask() { mask_.clear(); }
+
+  ///< Efficiently enables all accesses guarded by mask
+  CUTLASS_DEVICE void enable_mask() { mask_.enable(); }
+
+  ///< Sets the mask
+  CUTLASS_DEVICE void get_mask(Mask& mask) const { mask = mask_; }
+
+  ///< Sets the mask
+  CUTLASS_DEVICE void set_mask(Mask const& mask) { mask_ = mask; }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+}  // namespace threadblock
+}  // namespace epilogue
+}  // namespace cutlass
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec.h b/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec.h
new file mode 100644
index 0000000000..dc224c5c96
--- /dev/null
+++ b/cpp/include/raft/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec.h
@@ -0,0 +1,626 @@
+/***************************************************************************************************
+ * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ **************************************************************************************************/
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*! \file
+  \brief Epilogue for threadblock scoped GEMMs using Tensor Ops.
+
+This file contains a customized version of PredicatedTileIterator from CUTLASS 2.9.0
+(https://github.com/NVIDIA/cutlass/blob/v2.9.0/include/cutlass/epilogue/threadblock/predicated_tile_iterator.h#L75)
+
+Changes:
+- added `Layout_` template param
+- PredicatedTileIteratorParams() is customized to not stride by layout.stride(0).
+- makes use of `SharedStorage` to store reduced values across warps to gmem in coalesced manner.
+- customized the store_with_byte_offset() to perform reduction per row and write final value to
+gmem.
+- customized the Params() struct to take user inputs from epilogueOp params.
+
+*/
+
+#pragma once
+
+#include <cooperative_groups.h>
+#include <cooperative_groups/reduce.h>
+#include <cutlass/arch/arch.h>
+#include <cutlass/arch/memory.h>
+#include <cutlass/array.h>
+#include <cutlass/cutlass.h>
+#include <cutlass/epilogue/threadblock/output_tile_thread_map.h>
+#include <cutlass/epilogue/threadblock/predicated_tile_iterator_params.h>
+#include <cutlass/layout/matrix.h>
+#include <cutlass/layout/tensor.h>
+#include <cutlass/matrix_shape.h>
+#include <cutlass/numeric_types.h>
+#include <cutlass/tensor_ref.h>
+#include <cutlass/transform/pitch_linear_thread_map.h>
+
+namespace cg = cooperative_groups;
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace cutlass {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace epilogue {
+namespace threadblock {
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Tile iterator used to load and store output tile from global memory in epilogue.
+///
+/// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator
+///
+template <typename ThreadMap_,  ///< Thread map (conept: OutputTileThreadMap)
+          typename Element_,    ///< Element data type
+          typename Layout_,
+          typename EpilogueOpParams_,
+          bool ScatterD     = false,  ///< Scatter D operand or not
+          bool UseCUDAStore = false>
+class PredicatedTileIteratorReducedVec {
+ public:
+  using ThreadMap = ThreadMap_;
+  using Shape     = typename ThreadMap::Shape;
+
+  using Element = Element_;
+
+  using Layout         = Layout_;
+  using TensorRef      = TensorRef<Element, Layout>;
+  using ConstTensorRef = typename TensorRef::ConstTensorRef;
+
+  using Index            = typename Layout::Index;
+  using LongIndex        = typename Layout::LongIndex;
+  using TensorCoord      = MatrixCoord;
+  using EpilogueOpParams = EpilogueOpParams_;
+  using OutIdxT          = typename EpilogueOpParams::CGReduceT::IndexT;
+  using OutValT          = typename EpilogueOpParams::CGReduceT::AccTypeT;
+
+  static int const kElementsPerAccess = ThreadMap::kElementsPerAccess;
+  static int const kThreads           = ThreadMap::kThreads;
+  static int const kIterations        = ThreadMap::Count::kTile;
+
+  static_assert(ThreadMap::Iterations::kRow > 0, "ThreadMap::Iterations::kRow must be > 0");
+  static_assert(ThreadMap::Iterations::kGroup > 0, "ThreadMap::Iterations::kGroup must be > 0");
+  static_assert(ThreadMap::Iterations::kCluster > 0, "ThreadMap::Iterations::kCluster must be > 0");
+  static_assert(ThreadMap::Iterations::kColumn > 0, "ThreadMap::Iterations::kColumn must be > 0");
+  static_assert(!UseCUDAStore, "UseCUDAStore path is not supported");
+
+  static int const total_rows = ThreadMap::kWarpCount * ThreadMap::Iterations::kRow *
+                                ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster *
+                                ThreadMap::Count::kTile * ThreadMap::Delta::kRow;
+  /// Fragment object
+  using Fragment =
+    Array<OutValT,
+          ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow *
+            ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * kElementsPerAccess>;
+
+  // Memory access size
+  using AccessType     = AlignedArray<Element, kElementsPerAccess>;
+  using AccessTypeValT = AlignedArray<OutValT, kElementsPerAccess>;
+
+  //
+  // Parameters struct
+  //
+
+  /// Uses a non-template class
+  struct Params : PredicatedTileIteratorParams {
+    using Base = PredicatedTileIteratorParams;
+
+    EpilogueOpParams user_param;
+    CUTLASS_HOST_DEVICE
+    Params() {}
+
+    CUTLASS_HOST_DEVICE
+    Params(Layout const& layout)
+      : PredicatedTileIteratorParams(
+          layout.stride(0) * int(sizeof(AccessType)) / kElementsPerAccess,
+          make_OutputTileThreadMapDesc<ThreadMap>())
+    {
+    }
+
+    CUTLASS_HOST_DEVICE
+    Params(Layout const& layout, EpilogueOpParams const& user_param_)
+      : PredicatedTileIteratorParams(int(sizeof(AccessType)) / kElementsPerAccess,
+                                     make_OutputTileThreadMapDesc<ThreadMap>()),
+        user_param(user_param_)
+    {
+    }
+
+    CUTLASS_HOST_DEVICE
+    Params(Base const& base) : Base(base) {}
+  };
+
+  /// Mask object
+  struct Mask {
+    // static int const kCount = ThreadMap::Iterations::kColumn;
+    static int const kCount = ThreadMap::Iterations::kColumn * kElementsPerAccess;
+
+    /// Predicate state
+    bool predicates[kCount];
+
+    //
+    // Mask
+    //
+    CUTLASS_HOST_DEVICE
+    Mask() { enable(); }
+
+    ///< Efficiently disables all accesses guarded by mask
+    CUTLASS_HOST_DEVICE void clear()
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < kCount; ++i) {
+        predicates[i] = false;
+      }
+    }
+
+    ///< CUTLASS_HOST_DEVICE enables all accesses guarded by mask
+    CUTLASS_DEVICE void enable()
+    {
+      CUTLASS_PRAGMA_UNROLL
+      for (int i = 0; i < kCount; ++i) {
+        predicates[i] = true;
+      }
+    }
+  };
+
+  /// Shared storage allocation needed by the predicated tile
+  //  iterator for reduction.
+  struct SharedStorage {
+    //
+    // Type definitions
+    //
+    using Shape = MatrixShape<total_rows, 1>;
+
+    /// Shape of the shared memory allocation for the reduced values store
+    using StorageShape = MatrixShape<Shape::kRow, Shape::kColumn>;
+
+    //
+    // Data members
+
+    //
+    // Methods
+    //
+    AlignedBuffer<Element, StorageShape::kCount> storage;
+
+    CUTLASS_DEVICE
+    Element* data() { return storage.data(); }
+
+    SharedStorage() {}
+
+    CUTLASS_DEVICE
+    void initSmem(EpilogueOpParams const& user_params)
+    {
+      Element* shared_elem_arr = data();
+      constexpr auto maxVal    = std::numeric_limits<OutValT>::max();
+
+      for (int row = threadIdx.x; row < total_rows; row += blockDim.x) {
+        user_params.red_op_.init(&shared_elem_arr[row], maxVal);
+      }
+    }
+  };
+
+  template <typename cg_reduce_op_t,
+            typename cg_group_t,
+            typename IdxT,
+            typename ValT,
+            typename OutT>
+  struct select_reduce {
+    /// Performs warp level reduction and stores a reduced output to memory
+    CUTLASS_DEVICE
+    select_reduce(OutT value,
+                  ValT prev_red_val,
+                  cg_reduce_op_t reduce_op,
+                  cg_group_t cg_warp_group,
+                  OutT& shmem_ptr)
+    {
+      if (cg_warp_group.any(reduce_op.isAmin(value, prev_red_val))) {
+        OutT reduced_val = cg::reduce(cg_warp_group, value, reduce_op);
+        if (cg_warp_group.thread_rank() == 0) { shmem_ptr = reduced_val; }
+      }
+    }
+  };
+
+  template <typename cg_reduce_op_t, typename cg_group_t, typename IdxT>
+  struct select_reduce<cg_reduce_op_t, cg_group_t, IdxT, float, raft::KeyValuePair<IdxT, float>> {
+    using ValT = float;
+    using Ty   = raft::KeyValuePair<IdxT, ValT>;
+    /// Performs warp level reduction of key value pair and stores a reduced output to memory
+    CUTLASS_DEVICE
+    select_reduce(Ty val_to_red,
+                  float prev_red_val,
+                  cg_reduce_op_t cg_reduce_op,
+                  cg_group_t cg_warp_group,
+                  Ty& shmem_ptr)
+    {
+      ValT val = val_to_red.value;
+
+      if (cg_warp_group.any(cg_reduce_op.isAmin(val, prev_red_val))) {
+        ValT reduced_val = cg::reduce(cg_warp_group, val, cg_reduce_op);
+        bool pred        = (reduced_val == val);
+        auto subTile     = cg::binary_partition(cg_warp_group, pred);
+        if (pred) {
+          if (subTile.thread_rank() == 0) { shmem_ptr = val_to_red; }
+        }
+      }
+    }
+  };
+
+  template <typename cg_reduce_op_t, typename cg_group_t, typename IdxT>
+  struct select_reduce<cg_reduce_op_t, cg_group_t, IdxT, double, raft::KeyValuePair<IdxT, double>> {
+    using ValT = double;
+    using Ty   = raft::KeyValuePair<IdxT, ValT>;
+    /// Performs warp level reduction of key value pair and stores a reduced output to memory
+    CUTLASS_DEVICE
+    select_reduce(Ty val_to_red,
+                  double prev_red_val,
+                  cg_reduce_op_t cg_reduce_op,
+                  cg_group_t cg_warp_group,
+                  Ty& shmem_ptr)
+    {
+      ValT val = val_to_red.value;
+
+      if (cg_warp_group.any(cg_reduce_op.isAmin(val, prev_red_val))) {
+        ValT reduced_val = cg::reduce(cg_warp_group, val, cg_reduce_op);
+        bool pred        = (reduced_val == val);
+        auto subTile     = cg::binary_partition(cg_warp_group, pred);
+        if (pred) {
+          if (subTile.thread_rank() == 0) { shmem_ptr = val_to_red; }
+        }
+      }
+    }
+  };
+
+ private:
+  //
+  // Data members
+  //
+
+  /// Parameters structure containing reference and precomputed state.
+  Params params_;
+
+  /// Byte-level pointer
+  uint8_t* byte_pointer_;
+  /// Byte-level pointer first tile offset of this threadblock.
+  uint8_t* first_tile_byte_pointer_;
+
+  /// Array of boolean values to contain steady-state predicates
+  Mask mask_;
+
+  /// Extent of the matrix tile in rows
+  Index extent_row_;
+
+  /// Extent of the matrix tile in rows
+  Index extent_column_;
+
+  /// A thread's starting row position (assuming steady-state predicates have been computed)
+  Index thread_start_row_;
+  Index block_start_row_first_tile_;
+
+  /// A thread's starting column
+  Index thread_start_column_;
+
+  /// Internal state counter
+  int state_[3];
+  // mutable int shared_tile_id;
+
+  /// Scatter indices
+  int const* indices_;
+
+  //
+  // Static asserts about internal strides
+  //
+
+  static_assert(sizeof(extent_row_) == 4, "Expected 32b extents");
+  static_assert(sizeof(thread_start_row_) == 4, "Expected 32b extents");
+  static_assert(sizeof(Params::stride) == 8, "Expected 64b strides");
+
+ protected:
+  SharedStorage& shared_storage_;
+  const bool& do_gmem_reduction_;
+
+ private:
+  //
+  // Methods
+  //
+ public:
+  //
+  // Methods
+  //
+  /// Constructor
+  CUTLASS_DEVICE
+  PredicatedTileIteratorReducedVec(SharedStorage& shared_storage,
+                                   Params const& params,
+                                   Element* pointer,
+                                   TensorCoord extent,
+                                   int thread_idx,
+                                   const bool& do_gmem_reduction,
+                                   TensorCoord threadblock_offset = TensorCoord(),
+                                   int const* indices             = nullptr)
+    : params_(params),
+      indices_(indices),
+      shared_storage_(shared_storage),
+      do_gmem_reduction_(do_gmem_reduction)
+  {
+    TensorCoord thread_offset = ThreadMap::initial_offset(thread_idx) + threadblock_offset;
+
+    extent_row_    = extent.row();
+    extent_column_ = extent.column();
+
+    thread_start_row_    = thread_offset.row();
+    thread_start_column_ = thread_offset.column();
+
+    TensorCoord block_offset    = ThreadMap::initial_offset(0) + threadblock_offset;
+    block_start_row_first_tile_ = block_offset.row();
+
+    // Initialize predicates
+    CUTLASS_PRAGMA_UNROLL
+    for (int c = 0; c < ThreadMap::Iterations::kColumn * kElementsPerAccess; ++c) {
+      int columnPerAccess       = (c / kElementsPerAccess);
+      int columnWithinPerAccess = c % kElementsPerAccess;
+      mask_.predicates[c] = ((thread_offset.column() + ThreadMap::Delta::kColumn * columnPerAccess +
+                              columnWithinPerAccess) < extent.column());
+    }
+
+    if (threadblock_offset.column() == 0) {
+      EpilogueOpParams const& user_params = params_.user_param;
+      shared_storage_.initSmem(user_params);
+    }
+
+    // Null pointer performs no accesses
+    if (!pointer) { mask_.clear(); }
+
+    if (ScatterD && !indices) { mask_.clear(); }
+
+    // Initialize pointer
+    first_tile_byte_pointer_ = reinterpret_cast<uint8_t*>(pointer) +
+                               LongIndex(block_offset.row()) * LongIndex(params_.stride);
+
+    if (ScatterD) {
+      byte_pointer_ = reinterpret_cast<uint8_t*>(pointer) +
+                      LongIndex(thread_offset.column()) * sizeof(AccessType) / kElementsPerAccess;
+    }
+
+    // Initialize internal state counter
+    state_[0] = state_[1] = state_[2] = 0;
+  }
+
+  /// Destructor
+  CUTLASS_DEVICE
+  ~PredicatedTileIteratorReducedVec()
+  {
+    if (do_gmem_reduction_) {
+      EpilogueOpParams const& user_params = params_.user_param;
+      auto gmem_ptr                       = reinterpret_cast<Element*>(first_tile_byte_pointer_);
+      Element* shared_elem_arr            = shared_storage_.data();
+      const uint32_t mutex_id             = (block_start_row_first_tile_ / total_rows);
+      bool useGmemMutex = (gridDim.x != ((extent_row_ - 1 + total_rows) / total_rows));
+      // If this is not optimal grid size perform mutex based gmem reduce.
+      if (useGmemMutex) {
+        // single lock per block for multiple rows
+        if (threadIdx.x == 0 && block_start_row_first_tile_ < extent_row_) {
+          // acquire mutex lock.
+          unsigned int ns = 8;
+          while (atomicCAS(user_params.mutexes_ + mutex_id, 0, 1) == 1) {
+            __nanosleep(ns);
+            if (ns < 256) { ns *= 2; }
+          }
+        }
+      }
+
+      __syncthreads();
+      for (int row = threadIdx.x; row < total_rows; row += blockDim.x) {
+        if (block_start_row_first_tile_ + row < extent_row_) {
+          user_params.red_op_(
+            block_start_row_first_tile_ + row, &gmem_ptr[row], shared_elem_arr[row]);
+        }
+      }
+
+      if (useGmemMutex) {
+        __threadfence();
+        __syncthreads();
+        if (threadIdx.x == 0 && block_start_row_first_tile_ < extent_row_) {
+          // release mutex lock.
+          atomicExch(user_params.mutexes_ + mutex_id, 0);
+        }
+      }
+    }
+  }
+
+  /// Adds a pointer offset in units of Element
+  CUTLASS_HOST_DEVICE
+  void add_pointer_offset(LongIndex pointer_offset)
+  {
+    byte_pointer_ += pointer_offset * sizeof_bits<Element>::value / 8;
+  }
+
+  /// Performs reduction and Stores a reduced output to memory
+  CUTLASS_DEVICE
+  void store_with_byte_offset(Fragment& frag, int64_t byte_offset) const
+  {
+    AccessTypeValT* frag_ptr = reinterpret_cast<AccessTypeValT*>(&frag);
+
+    cg::thread_block cta = cg::this_thread_block();
+    // tile_width 16 is required if kElementPerAccess > 1
+    constexpr int tile_width                 = (32 / ThreadMap::Delta::kColumn) ? 32 : 16;
+    cg::thread_block_tile<tile_width> tile32 = cg::tiled_partition<tile_width>(cta);
+    EpilogueOpParams const& user_params      = params_.user_param;
+
+    using cg_reduce_t = decltype(user_params.cg_reduce_op);
+    using tile32_t    = decltype(tile32);
+
+    Element* shared_elem_arr = shared_storage_.data();
+    constexpr auto maxVal    = std::numeric_limits<OutValT>::max();
+
+    CUTLASS_PRAGMA_UNROLL
+    for (int cluster = 0; cluster < ThreadMap::Iterations::kCluster; ++cluster) {
+      CUTLASS_PRAGMA_UNROLL
+      for (int group = 0; group < ThreadMap::Iterations::kGroup; ++group) {
+        CUTLASS_PRAGMA_UNROLL
+        for (int row = 0; row < ThreadMap::Iterations::kRow; ++row) {
+          int frag_row_idx =
+            (row + ThreadMap::Iterations::kRow * (group + ThreadMap::Iterations::kGroup * cluster));
+
+          int row_offset = row * ThreadMap::Delta::kRow + group * ThreadMap::Delta::kGroup +
+                           cluster * ThreadMap::Delta::kCluster;
+
+          const OutIdxT row_id = row_offset + thread_start_row_;
+          bool row_guard       = (row_id < extent_row_);
+
+          const int frag_idx = frag_row_idx * ThreadMap::Iterations::kColumn * kElementsPerAccess;
+          Element red_val;
+          user_params.red_op_.init(&red_val, maxVal);
+
+          if (row_guard) {
+            const int iter_row      = (row_id % total_rows);
+            const auto prev_red_val = user_params.red_op_.get_value(shared_elem_arr[iter_row]);
+
+            CUTLASS_PRAGMA_UNROLL
+            for (int column = 0; column < ThreadMap::Iterations::kColumn * kElementsPerAccess;
+                 ++column) {
+              int columnPerAccess     = column / kElementsPerAccess;
+              int columnWithPerAccess = column % kElementsPerAccess;
+              bool guard              = mask_.predicates[column];
+              if (guard) {
+                const OutIdxT key_id = thread_start_column_ +
+                                       ThreadMap::Delta::kColumn * columnPerAccess +
+                                       columnWithPerAccess;
+                const int frag_col_idx = frag_idx + column;
+
+                Element this_val;
+                user_params.red_op_.init(&this_val, (*frag_ptr)[frag_col_idx]);
+                user_params.red_op_.init_key(this_val, key_id);
+                user_params.red_op_(row_id, &red_val, this_val);
+              }
+            }
+            // select_reduce doesn't need to use `red_op_` as at the warp level we use cg_reduce_op,
+            // this satisfies the requirement of mst/single linkage of checking colors buffer.
+            select_reduce<cg_reduce_t, tile32_t, OutIdxT, OutValT, Element> red_obj(
+              red_val, prev_red_val, user_params.cg_reduce_op, tile32, shared_elem_arr[iter_row]);
+          }
+        }
+      }
+    }
+  }
+
+  /// Stores a fragment to memory
+  CUTLASS_DEVICE
+  void store(Fragment& frag) const { store_with_byte_offset(frag, 0); }
+
+  CUTLASS_DEVICE
+  MatrixCoord thread_start() const { return MatrixCoord(thread_start_row_, thread_start_column_); }
+
+  /// Need to get the thread start row from the tile iterator
+  CUTLASS_DEVICE
+  int32_t thread_start_row() const { return thread_start_row_; }
+
+  /// Need to get the thread start row from the tile iterator
+  CUTLASS_DEVICE
+  int32_t thread_start_column() const { return thread_start_column_; }
+
+  /// Extent of the matrix in rows
+  CUTLASS_DEVICE
+  Index extent_row() const { return extent_row_; }
+
+  /// Extent of the matrix in columns
+  CUTLASS_DEVICE
+  Index extent_column() const { return extent_column_; }
+
+  /// Advances to the next position to load or store
+  CUTLASS_HOST_DEVICE
+  PredicatedTileIteratorReducedVec& operator++()
+  {
+    ++state_[0];
+
+    if (!ScatterD) { byte_pointer_ += params_.advance_row; }
+
+    thread_start_row_ += ThreadMap::Shape::kRow;
+
+    if (state_[0] == ThreadMap::Count::kRow) {
+      state_[0] = 0;
+      ++state_[1];
+      byte_pointer_ += params_.advance_group;
+
+      thread_start_row_ +=
+        (ThreadMap::Shape::kGroup - 1) * ThreadMap::Shape::kRow * ThreadMap::Count::kRow;
+
+      if (state_[1] == ThreadMap::Count::kGroup) {
+        state_[1] = 0;
+        ++state_[2];
+        byte_pointer_ += params_.advance_cluster;
+
+        thread_start_row_ += ThreadMap::Count::kGroup * ThreadMap::Shape::kGroup *
+                             ThreadMap::Count::kRow * ThreadMap::Shape::kRow;
+
+        if (state_[2] == ThreadMap::Count::kCluster) {
+          state_[2] = 0;
+          byte_pointer_ += params_.advance_tile;
+        }
+      }
+    }
+
+    return *this;
+  }
+
+  ///< Efficiently disables all accesses guarded by mask
+  CUTLASS_DEVICE void clear_mask() { mask_.clear(); }
+
+  ///< Efficiently enables all accesses guarded by mask
+  CUTLASS_DEVICE void enable_mask() { mask_.enable(); }
+
+  ///< Sets the mask
+  CUTLASS_DEVICE void get_mask(Mask& mask) const { mask = mask_; }
+
+  ///< Sets the mask
+  CUTLASS_DEVICE void set_mask(Mask const& mask) { mask_ = mask; }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+}  // namespace threadblock
+}  // namespace epilogue
+}  // namespace cutlass
+
+////////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index be6fed9f10..2ff8fa7f1c 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -21,8 +21,10 @@
 #include <raft/core/kvp.hpp>                                // raft::KeyValuePair
 #include <raft/core/operators.hpp>                          // raft::identity_op
 #include <raft/distance/detail/distance_ops/l2_exp.cuh>     // ops::l2_exp_distance_op
+#include <raft/distance/detail/fused_distance_nn/cutlass_base.cuh>
 #include <raft/distance/detail/pairwise_distance_base.cuh>  // PairwiseDistances
 #include <raft/linalg/contractions.cuh>                     // Policy
+#include <raft/util/arch.cuh>                               // raft::util::arch::SM_*
 #include <raft/util/cuda_utils.cuh>                         // raft::ceildiv, raft::shfl
 
 namespace raft {
@@ -41,7 +43,7 @@ struct KVPMinReduceImpl {
 template <typename LabelT, typename DataT>
 struct MinAndDistanceReduceOpImpl {
   typedef typename raft::KeyValuePair<LabelT, DataT> KVP;
-  DI void operator()(LabelT rid, KVP* out, const KVP& other)
+  DI void operator()(LabelT rid, KVP* out, const KVP& other) const
   {
     if (other.value < out->value) {
       out->key   = other.key;
@@ -49,17 +51,28 @@ struct MinAndDistanceReduceOpImpl {
     }
   }
 
-  DI void operator()(LabelT rid, DataT* out, const KVP& other)
+  DI void operator()(LabelT rid, DataT* out, const KVP& other) const
   {
     if (other.value < *out) { *out = other.value; }
   }
 
-  DI void init(DataT* out, DataT maxVal) { *out = maxVal; }
-  DI void init(KVP* out, DataT maxVal)
+  DI void operator()(LabelT rid, DataT* out, const DataT& other) const
   {
-    out->key   = 0;
-    out->value = maxVal;
+    if (other < *out) { *out = other; }
   }
+
+  DI void init(DataT* out, DataT maxVal) const { *out = maxVal; }
+  DI void init(KVP* out, DataT maxVal) const { out->value = maxVal; }
+
+  DI void init_key(DataT& out, LabelT idx) const { return; }
+  DI void init_key(KVP& out, LabelT idx) const { out.key = idx; }
+
+  DI DataT get_value(KVP& out) const
+  {
+    return out.value;
+    ;
+  }
+  DI DataT get_value(DataT& out) const { return out; }
 };
 
 template <typename LabelT, typename DataT>
@@ -141,6 +154,8 @@ __global__ __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min,
                                                                   OpT distance_op,
                                                                   FinalLambda fin_op)
 {
+// compile only if below non-ampere arch.
+#if __CUDA_ARCH__ < 800
   extern __shared__ char smem[];
 
   typedef KeyValuePair<IdxT, DataT> KVPair;
@@ -236,8 +251,29 @@ __global__ __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min,
         fin_op,
         rowEpilog_lambda);
   obj.run();
+#endif
 }
 
+// cg::reduce functor for FusedDistanceNN used in its cutlass version
+// to output the min distance value & key(loc id).
+// This is used in fused_distance_nn/predicated_tile_iterator_reduced_vec.h
+// store_with_byte_offset() passed to cg::reduce() & select_reduce.
+template <typename AccType, typename Index, typename OutType>
+struct kvp_cg_min_reduce_op {
+  typedef typename raft::KeyValuePair<Index, AccType> KVP;
+
+  __host__ __device__ kvp_cg_min_reduce_op() noexcept {};
+
+  using AccTypeT = AccType;
+  using IndexT   = Index;
+  // functor signature.
+  __host__ __device__ KVP operator()(KVP a, KVP b) const { return a.value < b.value ? a : b; }
+
+  __host__ __device__ AccType operator()(AccType a, AccType b) const { return min(a, b); }
+
+  __host__ __device__ bool isAmin(AccType a, AccType b) const { return a < b ? true : false; }
+};
+
 template <typename DataT,
           typename OutT,
           typename IdxT,
@@ -274,9 +310,8 @@ void fusedL2NNImpl(OutT* min,
     RAFT_CUDA_TRY(cudaGetLastError());
   }
 
-  constexpr size_t shmemSize = P::SmemSize + ((P::Mblk + P::Nblk) * sizeof(DataT));
-
-  using AccT = DataT;
+  namespace arch = raft::util::arch;
+  using AccT     = DataT;
   ops::l2_exp_distance_op<DataT, AccT, IdxT> distance_op{sqrt};
 
   raft::identity_op fin_op{};
@@ -290,11 +325,58 @@ void fusedL2NNImpl(OutT* min,
                                 decltype(distance_op),
                                 decltype(fin_op)>;
 
-  dim3 grid = launchConfigGenerator<P>(m, n, shmemSize, kernel);
-
-  kernel<<<grid, blk, shmemSize, stream>>>(
-    min, x, y, xn, yn, m, n, k, maxVal, workspace, redOp, pairRedOp, distance_op, fin_op);
-  RAFT_CUDA_TRY(cudaGetLastError());
+  // Get pointer to fp32 SIMT kernel to determine the runtime architecture of the
+  // current system. Other methods to determine the architecture (that do not
+  // require a pointer) can be error prone. See:
+  // https://github.com/NVIDIA/cub/issues/545
+  void* kernel_ptr   = reinterpret_cast<void*>(kernel);
+  auto runtime_arch  = arch::kernel_runtime_arch(kernel_ptr);
+  auto cutlass_range = arch::SM_range(arch::SM_80(), arch::SM_future());
+
+  if (cutlass_range.contains(runtime_arch)) {
+    // If device is SM_80 or later, use CUTLASS-based kernel.
+    using L2Op                  = raft::distance::detail::ops::l2_exp_cutlass_op<DataT, DataT>;
+    using kvp_cg_min_reduce_op_ = kvp_cg_min_reduce_op<DataT, IdxT, OutT>;
+    kvp_cg_min_reduce_op_ cg_reduce_op;
+    L2Op L2_dist_op(sqrt);
+
+    IdxT lda, ldb, ldd;
+    lda = k, ldb = k, ldd = n;
+
+    cutlassFusedDistanceNN<DataT,
+                           DataT,
+                           OutT,
+                           IdxT,
+                           P::Veclen,
+                           kvp_cg_min_reduce_op_,
+                           L2Op,
+                           ReduceOpT,
+                           KVPReduceOpT>(x,
+                                         y,
+                                         xn,
+                                         yn,
+                                         m,
+                                         n,
+                                         k,
+                                         lda,
+                                         ldb,
+                                         ldd,
+                                         min,
+                                         workspace,
+                                         cg_reduce_op,
+                                         L2_dist_op,
+                                         redOp,
+                                         pairRedOp,
+                                         stream);
+  } else {
+    // If device less than SM_80, use fp32 SIMT kernel.
+    constexpr size_t shmemSize = P::SmemSize + ((P::Mblk + P::Nblk) * sizeof(DataT));
+    dim3 grid                  = launchConfigGenerator<P>(m, n, shmemSize, kernel);
+
+    kernel<<<grid, blk, shmemSize, stream>>>(
+      min, x, y, xn, yn, m, n, k, maxVal, workspace, redOp, pairRedOp, distance_op, fin_op);
+    RAFT_CUDA_TRY(cudaGetLastError());
+  }
 }
 
 }  // namespace detail
diff --git a/cpp/include/raft/distance/detail/pairwise_distance_cutlass_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_cutlass_base.cuh
index efcd5d9389..ccb3bd46bf 100644
--- a/cpp/include/raft/distance/detail/pairwise_distance_cutlass_base.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_distance_cutlass_base.cuh
@@ -18,6 +18,7 @@
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wtautological-compare"
 
 // We define CUTLASS_NAMESPACE in case
 // RAFT cmake is not used
@@ -38,20 +39,11 @@
 #include <cutlass/tensor_view.h>
 
 #include <raft/distance/detail/distance_ops/cutlass.cuh>
+#include <raft/util/cutlass_utils.cuh>
 
 #include "./pairwise_distance_epilogue_elementwise.h"
 #include "./pairwise_distance_gemm.h"
 
-#define CUTLASS_CHECK(status)                                                                    \
-  {                                                                                              \
-    cutlass::Status error = status;                                                              \
-    if (error != cutlass::Status::kSuccess) {                                                    \
-      std::cerr << "Got cutlass error: " << cutlassGetStatusString(error) << " at: " << __LINE__ \
-                << std::endl;                                                                    \
-      exit(EXIT_FAILURE);                                                                        \
-    }                                                                                            \
-  }
-
 namespace raft {
 namespace distance {
 namespace detail {
@@ -164,14 +156,13 @@ std::enable_if_t<ops::has_cutlass_op<OpT>::value> cutlassDistanceKernel(const Da
   // Instantiate CUTLASS kernel depending on templates
   cutlassDist cutlassDist_op;
   // Check the problem size is supported or not
-  cutlass::Status status = cutlassDist_op.can_implement(arguments);
-  CUTLASS_CHECK(status);
+  RAFT_CUTLASS_TRY(cutlassDist_op.can_implement(arguments));
+
   // Initialize CUTLASS kernel with arguments and workspace pointer
-  status = cutlassDist_op.initialize(arguments, workspace.data(), stream);
-  CUTLASS_CHECK(status);
+  RAFT_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream));
+
   // Launch initialized CUTLASS kernel
-  status = cutlassDist_op();
-  CUTLASS_CHECK(status);
+  RAFT_CUTLASS_TRY(cutlassDist_op());
 }
 
 };  // namespace detail
diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
index ebe6d0c80a..cd748b9e6b 100644
--- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
+++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h
@@ -284,11 +284,15 @@ class PredicatedTileIteratorNormVec {
           CUTLASS_PRAGMA_UNROLL
           for (int column = 0; column < ThreadMap::Iterations::kColumn; ++column) {
             bool guard = row_guard && mask_.predicates[column];
-
-            cutlass::arch::global_load<AccessType, sizeof(AccessType)>(
-              frag_ptr[frag_row_idx * ThreadMap::Iterations::kColumn + column],
-              (void*)&memory_pointer[0],
-              guard);
+            if (column == 0) {
+              cutlass::arch::global_load<AccessType, sizeof(AccessType)>(
+                frag_ptr[frag_row_idx * ThreadMap::Iterations::kColumn + column],
+                (void*)&memory_pointer[0],
+                guard);
+            } else {
+              frag_ptr[frag_row_idx * ThreadMap::Iterations::kColumn + column] =
+                frag_ptr[frag_row_idx * ThreadMap::Iterations::kColumn];
+            }
           }
 
           if (row + 1 < ThreadMap::Iterations::kRow) {
diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
index adcb566cea..f089cbea83 100644
--- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
+++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh
@@ -61,10 +61,13 @@ struct FixConnectivitiesRedOp {
   value_idx* colors;
   value_idx m;
 
+  // default constructor for cutlass
+  DI FixConnectivitiesRedOp() : colors(0), m(0) {}
+
   FixConnectivitiesRedOp(value_idx* colors_, value_idx m_) : colors(colors_), m(m_){};
 
   typedef typename raft::KeyValuePair<value_idx, value_t> KVP;
-  DI void operator()(value_idx rit, KVP* out, const KVP& other)
+  DI void operator()(value_idx rit, KVP* out, const KVP& other) const
   {
     if (rit < m && other.value < out->value && colors[rit] != colors[other.key]) {
       out->key   = other.key;
@@ -72,9 +75,7 @@ struct FixConnectivitiesRedOp {
     }
   }
 
-  DI KVP
-
-  operator()(value_idx rit, const KVP& a, const KVP& b)
+  DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const
   {
     if (rit < m && a.value < b.value && colors[rit] != colors[a.key]) {
       return a;
@@ -82,12 +83,19 @@ struct FixConnectivitiesRedOp {
       return b;
   }
 
-  DI void init(value_t* out, value_t maxVal) { *out = maxVal; }
-  DI void init(KVP* out, value_t maxVal)
+  DI void init(value_t* out, value_t maxVal) const { *out = maxVal; }
+  DI void init(KVP* out, value_t maxVal) const
   {
     out->key   = -1;
     out->value = maxVal;
   }
+
+  DI void init_key(value_t& out, value_idx idx) const { return; }
+  DI void init_key(KVP& out, value_idx idx) const { out.key = idx; }
+
+  DI value_t get_value(KVP& out) const { return out.value; }
+
+  DI value_t get_value(value_t& out) const { return out; }
 };
 
 /**
diff --git a/cpp/include/raft/util/cutlass_utils.cuh b/cpp/include/raft/util/cutlass_utils.cuh
new file mode 100644
index 0000000000..da402c9427
--- /dev/null
+++ b/cpp/include/raft/util/cutlass_utils.cuh
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cutlass/cutlass.h>
+#include <raft/core/error.hpp>
+
+namespace raft {
+
+/**
+ * @brief Exception thrown when a CUTLASS error is encountered.
+ */
+struct cutlass_error : public raft::exception {
+  explicit cutlass_error(char const* const message) : raft::exception(message) {}
+  explicit cutlass_error(std::string const& message) : raft::exception(message) {}
+};
+
+}  // namespace raft
+
+/**
+ * @brief Error checking macro for CUTLASS functions.
+ *
+ * Invokes a CUTLASS function call, if the call does not return cutlass::Status::kSuccess,
+ * throws an exception detailing the CUTLASS error that occurred.
+ *
+ */
+#define RAFT_CUTLASS_TRY(call)                        \
+  do {                                                \
+    cutlass::Status const status = call;              \
+    if (status != cutlass::Status::kSuccess) {        \
+      std::string msg{};                              \
+      SET_ERROR_MSG(msg,                              \
+                    "CUTLASS error encountered at: ", \
+                    "call='%s', Reason=%s",           \
+                    #call,                            \
+                    cutlassGetStatusString(status));  \
+      throw raft::cutlass_error(msg);                 \
+    }                                                 \
+  } while (0)
diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu
index e3f3bf3324..e807256f67 100644
--- a/cpp/test/distance/fused_l2_nn.cu
+++ b/cpp/test/distance/fused_l2_nn.cu
@@ -276,6 +276,8 @@ const std::vector<Inputs<float>> inputsf = {
   {0.001f, 128, 128, 65, 1234ULL},
   {0.001f, 64, 128, 129, 1234ULL},
   {0.006f, 1805, 134, 2, 1234ULL},
+  {0.006f, 8192, 1024, 64, 1234ULL},
+  {0.006f, 8192, 1025, 64, 1234ULL},
 
   // Repeat with smaller values of k
   {0.006f, 32, 32, 1, 1234ULL},
@@ -305,6 +307,8 @@ const std::vector<Inputs<float>> inputsf = {
   {0.001f, 128, 128, 23, 1234ULL},
   {0.00001, 64, 128, 24, 1234ULL},
   {0.001f, 1805, 134, 25, 1234ULL},
+  {0.006f, 8192, 1024, 25, 1234ULL},
+  {0.006f, 8192, 1024, 66, 1234ULL},
 };
 typedef FusedL2NNTest<float, false> FusedL2NNTestF_Sq;
 TEST_P(FusedL2NNTestF_Sq, Result)
@@ -339,7 +343,7 @@ const std::vector<Inputs<double>> inputsd = {
   {0.00001, 128, 32, 33, 1234ULL},  {0.00001, 128, 64, 33, 1234ULL},
   {0.00001, 128, 128, 65, 1234ULL}, {0.00001, 64, 128, 129, 1234ULL},
 
-  {0.00001, 1805, 134, 2, 1234ULL},
+  {0.00001, 1805, 134, 2, 1234ULL}, {0.00001, 8192, 1024, 25, 1234ULL},
 };
 typedef FusedL2NNTest<double, false> FusedL2NNTestD_Sq;
 TEST_P(FusedL2NNTestD_Sq, Result)

From d99d249dc9231837333cec60f9a41689262073d3 Mon Sep 17 00:00:00 2001
From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com>
Date: Wed, 17 May 2023 00:26:40 +0200
Subject: [PATCH 57/78] ivf-flat: limit the workspace size of the search via
 batching (#1515)

closes https://github.com/rapidsai/raft/issues/1343

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1515
---
 .../neighbors/detail/ivf_flat_search-inl.cuh  | 33 ++++++++++++-------
 cpp/test/neighbors/ann_ivf_flat.cuh           |  3 +-
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
index b4711fa14b..66ad9682d7 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh
@@ -214,22 +214,33 @@ inline void search(raft::resources const& handle,
                "n_probes (number of clusters to probe in the search) must be positive.");
   auto n_probes = std::min<uint32_t>(params.n_probes, index.n_lists());
 
-  auto pool_guard = raft::get_pool_memory_resource(mr, n_queries * n_probes * k * 16);
+  // a batch size heuristic: try to keep the workspace within the specified size
+  constexpr uint32_t kExpectedWsSize = 1024 * 1024 * 1024;
+  const uint32_t max_queries =
+    std::min<uint32_t>(n_queries,
+                       raft::div_rounding_up_safe<uint64_t>(
+                         kExpectedWsSize, 16ull * uint64_t{n_probes} * k + 4ull * index.dim()));
+
+  auto pool_guard = raft::get_pool_memory_resource(mr, max_queries * n_probes * k * 16);
   if (pool_guard) {
     RAFT_LOG_DEBUG("ivf_flat::search: using pool memory resource with initial size %zu bytes",
                    n_queries * n_probes * k * 16ull);
   }
 
-  return search_impl<T, float, IdxT>(handle,
-                                     index,
-                                     queries,
-                                     n_queries,
-                                     k,
-                                     n_probes,
-                                     raft::distance::is_min_close(index.metric()),
-                                     neighbors,
-                                     distances,
-                                     mr);
+  for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) {
+    uint32_t queries_batch = min(max_queries, n_queries - offset_q);
+
+    search_impl<T, float, IdxT>(handle,
+                                index,
+                                queries + offset_q * index.dim(),
+                                queries_batch,
+                                k,
+                                n_probes,
+                                raft::distance::is_min_close(index.metric()),
+                                neighbors + offset_q * k,
+                                distances + offset_q * k,
+                                mr);
+  }
 }
 
 }  // namespace raft::neighbors::ivf_flat::detail
diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh
index 3f10f2cf40..1c9406e8a9 100644
--- a/cpp/test/neighbors/ann_ivf_flat.cuh
+++ b/cpp/test/neighbors/ann_ivf_flat.cuh
@@ -335,6 +335,7 @@ const std::vector<AnnIvfFlatInputs<int64_t>> inputs = {
 
   // test splitting the big query batches  (> max gridDim.y) into smaller batches
   {100000, 1024, 32, 10, 64, 64, raft::distance::DistanceType::InnerProduct, false},
+  {1000000, 1024, 32, 10, 256, 256, raft::distance::DistanceType::InnerProduct, false},
   {98306, 1024, 32, 10, 64, 64, raft::distance::DistanceType::InnerProduct, true},
 
   // test radix_sort for getting the cluster selection
@@ -355,4 +356,4 @@ const std::vector<AnnIvfFlatInputs<int64_t>> inputs = {
    raft::distance::DistanceType::InnerProduct,
    false}};
 
-}  // namespace raft::neighbors::ivf_flat
\ No newline at end of file
+}  // namespace raft::neighbors::ivf_flat

From 7018118fae553420ae2428b1011d4272dccabb3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20F=C3=B6rster?=
 <97973773+mfoerste4@users.noreply.github.com>
Date: Wed, 17 May 2023 22:20:06 +0200
Subject: [PATCH 58/78] [REVIEW] make raft::cache::Cache protected to allow
 overrides (#1522)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are planning to do some experiments with partial cache updates within cuml. In order to implement overrides for the given public functions and to allow re-usability for the cache internals we need to move the cache members to protected.

CC @cjnolet @tfeher

Authors:
  - Malte Förster (https://github.com/mfoerste4)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1522
---
 cpp/include/raft/util/cache.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh
index 64b180de2a..a9cfe64568 100644
--- a/cpp/include/raft/util/cache.cuh
+++ b/cpp/include/raft/util/cache.cuh
@@ -361,7 +361,7 @@ class Cache {
    */
   int GetSize() const { return cached_keys.size(); }
 
- private:
+ protected:
   int n_vec;            //!< Number of elements in a cached vector
   float cache_size;     //!< in MiB
   int n_cache_sets;     //!< number of cache sets

From 5392a91e2025a6c636a415c618dd854567bc2bd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20F=C3=B6rster?=
 <97973773+mfoerste4@users.noreply.github.com>
Date: Wed, 17 May 2023 22:20:18 +0200
Subject: [PATCH 59/78] [REVIEW] Fix padding assertion in sparse Gram
 evaluation (#1521)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A assertion could fail if the result kernel matrix is column major but has only a single row. In this case the old implementation identified the matrix as row major instead and the following stride-check failed.

Now the check is extended to identify both layouts. The only case where it cannot be exactly determined is a 1x1 matrix in which the following code just picks column major as default.
CC @cjnolet @tfeher

Authors:
  - Malte Förster (https://github.com/mfoerste4)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1521
---
 .../raft/distance/detail/kernels/gram_matrix.cuh  | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
index 7cfc75cd96..9b079a8539 100644
--- a/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
+++ b/cpp/include/raft/distance/detail/kernels/gram_matrix.cuh
@@ -464,18 +464,19 @@ class GramMatrixBase {
               csr_input_matrix_view_t<math_t> x2,
               dense_output_matrix_view_t<math_t> out)
   {
-    // check is_row_major consistency
-    bool is_row_major = get_is_row_major(out);
-    int ld_out        = is_row_major ? out.stride(0) : out.stride(1);
-    int minor_out     = is_row_major ? out.extent(1) : out.extent(0);
-    ASSERT(ld_out == minor_out, "Sparse linear Kernel distance does not support ld_out parameter");
+    // check layout consistency (w.r.t. strides a matrix might be both row & col major)
+    bool is_row_major_nopad = get_is_row_major(out) && out.stride(0) == out.extent(1);
+    bool is_col_major_nopad = get_is_col_major(out) && out.stride(1) == out.extent(0);
+
+    ASSERT(is_row_major_nopad || is_col_major_nopad,
+           "Sparse linear Kernel distance does not support ld_out parameter");
 
     auto x1_structure = x1.structure_view();
     auto x2_structure = x2.structure_view();
     raft::sparse::distance::distances_config_t<int, math_t> dist_config(handle);
 
-    // switch a,b based on is_row_major
-    if (!is_row_major) {
+    // switch a,b based on data layout
+    if (is_col_major_nopad) {
       dist_config.a_nrows   = x2_structure.get_n_rows();
       dist_config.a_ncols   = x2_structure.get_n_cols();
       dist_config.a_nnz     = x2_structure.get_nnz();

From 618dc23cff42b546d70e875875794b22306df4fc Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Wed, 17 May 2023 16:06:44 -0700
Subject: [PATCH 60/78] Learn heuristic to pick fastest select_k algorithm
 (#1523)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This uses the select_k dataset from https://github.com/rapidsai/raft/pull/1497 to learn a heuristic of the fastest select_k variant based off the rows/ cols/k of the input. This heuristic is modelled as a DecisionTree, which is automatically exported in C++ code that is compiled into RAFT. This lets us learn a function to pick the fastest select_k method - which requires only a few if statements in C++ code, making it very cheap to evaluate.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1523
---
 cpp/bench/prims/matrix/select_k.cu            |  14 +-
 .../raft/matrix/detail/select_k-inl.cuh       | 120 +++-
 .../raft_internal/matrix/select_k.cuh         |   1 +
 .../select_k/algorithm_selection.ipynb        | 443 +++++++++++++
 .../select_k/generate_heuristic.ipynb         | 587 ++++++++++++++++++
 .../heuristics/select_k/generate_plots.ipynb  | 352 +++++++++++
 .../heuristics/select_k/select_k_dataset.py   | 114 ++++
 7 files changed, 1620 insertions(+), 11 deletions(-)
 create mode 100644 cpp/scripts/heuristics/select_k/algorithm_selection.ipynb
 create mode 100644 cpp/scripts/heuristics/select_k/generate_heuristic.ipynb
 create mode 100644 cpp/scripts/heuristics/select_k/generate_plots.ipynb
 create mode 100644 cpp/scripts/heuristics/select_k/select_k_dataset.py

diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
index 22ec998f4f..1bff66cac4 100644
--- a/cpp/bench/prims/matrix/select_k.cu
+++ b/cpp/bench/prims/matrix/select_k.cu
@@ -41,7 +41,7 @@ using namespace raft::bench;  // NOLINT
 template <typename KeyT, typename IdxT, select::Algo Algo>
 struct selection : public fixture {
   explicit selection(const select::params& p)
-    : fixture(true),
+    : fixture(p.use_memory_pool),
       params_(p),
       in_dists_(p.batch_size * p.len, stream),
       in_ids_(p.batch_size * p.len, stream),
@@ -193,7 +193,8 @@ SELECTION_REGISTER(double, int64_t, kWarpDistributedShm);     // NOLINT
     using SelectK = selection<KeyT, IdxT, select::Algo::A>;                               \
     std::stringstream name;                                                               \
     name << "SelectKDataset/" << #KeyT "/" #IdxT "/" #A << "/" << input.batch_size << "/" \
-         << input.len << "/" << input.k << "/" << input.use_index_input;                  \
+         << input.len << "/" << input.k << "/" << input.use_index_input << "/"            \
+         << input.use_memory_pool;                                                        \
     auto* b = ::benchmark::internal::RegisterBenchmarkInternal(                           \
       new raft::bench::internal::Fixture<SelectK, select::params>(name.str(), input));    \
     b->UseManualTime();                                                                   \
@@ -266,5 +267,14 @@ void add_select_k_dataset_benchmarks()
     SELECTION_REGISTER_INPUT(float, int64_t, input);
     SELECTION_REGISTER_INPUT(float, uint32_t, input);
   }
+
+  // also try again without a memory pool to see if there are significant differences
+  for (auto input : inputs) {
+    input.use_memory_pool = false;
+    SELECTION_REGISTER_INPUT(double, int64_t, input);
+    SELECTION_REGISTER_INPUT(double, uint32_t, input);
+    SELECTION_REGISTER_INPUT(float, int64_t, input);
+    SELECTION_REGISTER_INPUT(float, uint32_t, input);
+  }
 }
 }  // namespace raft::matrix
diff --git a/cpp/include/raft/matrix/detail/select_k-inl.cuh b/cpp/include/raft/matrix/detail/select_k-inl.cuh
index 20c2fb119d..dba2d1d841 100644
--- a/cpp/include/raft/matrix/detail/select_k-inl.cuh
+++ b/cpp/include/raft/matrix/detail/select_k-inl.cuh
@@ -21,11 +21,101 @@
 
 #include <raft/core/nvtx.hpp>
 
+#include <raft/neighbors/detail/selection_faiss.cuh>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 namespace raft::matrix::detail {
 
+// this is a subset of algorithms, chosen by running the algorithm_selection
+// notebook in cpp/scripts/heuristics/select_k
+enum class Algo { kRadix11bits, kWarpDistributedShm, kFaissBlockSelect };
+
+/**
+ * Predict the fastest select_k algorithm based on the number of rows/cols/k
+ *
+ * The body of this method is automatically generated, using a DecisionTree
+ * to predict the fastest algorithm based off of thousands of trial runs
+ * on different values of rows/cols/k. The decision tree is converted to c++
+ * code, which is cut and paste below.
+ *
+ * NOTE: The code to generate is in cpp/scripts/heuristics/select_k, running the
+ * 'generate_heuristic' notebook there will replace the body of this function
+ * with the latest learned heuristic
+ */
+inline Algo choose_select_k_algorithm(size_t rows, size_t cols, int k)
+{
+  if (k > 134) {
+    if (k > 256) {
+      if (k > 809) {
+        return Algo::kRadix11bits;
+      } else {
+        if (rows > 124) {
+          if (cols > 63488) {
+            return Algo::kFaissBlockSelect;
+          } else {
+            return Algo::kRadix11bits;
+          }
+        } else {
+          return Algo::kRadix11bits;
+        }
+      }
+    } else {
+      if (cols > 678736) {
+        return Algo::kWarpDistributedShm;
+      } else {
+        return Algo::kRadix11bits;
+      }
+    }
+  } else {
+    if (cols > 13776) {
+      if (rows > 335) {
+        if (k > 1) {
+          if (rows > 546) {
+            return Algo::kWarpDistributedShm;
+          } else {
+            if (k > 17) {
+              return Algo::kWarpDistributedShm;
+            } else {
+              return Algo::kFaissBlockSelect;
+            }
+          }
+        } else {
+          return Algo::kFaissBlockSelect;
+        }
+      } else {
+        if (k > 44) {
+          if (cols > 1031051) {
+            return Algo::kWarpDistributedShm;
+          } else {
+            if (rows > 22) {
+              return Algo::kWarpDistributedShm;
+            } else {
+              return Algo::kRadix11bits;
+            }
+          }
+        } else {
+          return Algo::kWarpDistributedShm;
+        }
+      }
+    } else {
+      if (k > 1) {
+        if (rows > 188) {
+          return Algo::kWarpDistributedShm;
+        } else {
+          if (k > 72) {
+            return Algo::kRadix11bits;
+          } else {
+            return Algo::kWarpDistributedShm;
+          }
+        }
+      } else {
+        return Algo::kFaissBlockSelect;
+      }
+    }
+  }
+}
+
 /**
  * Select k smallest or largest key/values from each row in the input data.
  *
@@ -77,15 +167,27 @@ void select_k(const T* in_val,
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "matrix::select_k(batch_size = %zu, len = %zu, k = %d)", batch_size, len, k);
-  // TODO (achirkin): investigate the trade-off for a wider variety of inputs.
-  const bool radix_faster = batch_size >= 64 && len >= 102400 && k >= 128;
-  if (k <= select::warpsort::kMaxCapacity && !radix_faster) {
-    select::warpsort::select_k<T, IdxT>(
-      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, stream, mr);
-  } else {
-    select::radix::select_k<T, IdxT, (sizeof(T) >= 4 ? 11 : 8), 512>(
-      in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, true, stream, mr);
+
+  auto algo = choose_select_k_algorithm(batch_size, len, k);
+  switch (algo) {
+    case Algo::kRadix11bits:
+      return detail::select::radix::select_k<T, IdxT, 11, 512>(in_val,
+                                                               in_idx,
+                                                               batch_size,
+                                                               len,
+                                                               k,
+                                                               out_val,
+                                                               out_idx,
+                                                               select_min,
+                                                               true,  // fused_last_filter
+                                                               stream);
+    case Algo::kWarpDistributedShm:
+      return detail::select::warpsort::
+        select_k_impl<T, IdxT, detail::select::warpsort::warp_sort_distributed_ext>(
+          in_val, in_idx, batch_size, len, k, out_val, out_idx, select_min, stream);
+    case Algo::kFaissBlockSelect:
+      return neighbors::detail::select_k(
+        in_val, in_idx, batch_size, len, out_val, out_idx, select_min, k, stream);
   }
 }
-
 }  // namespace raft::matrix::detail
diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh
index ac9a0bb717..013a61886f 100644
--- a/cpp/internal/raft_internal/matrix/select_k.cuh
+++ b/cpp/internal/raft_internal/matrix/select_k.cuh
@@ -32,6 +32,7 @@ struct params {
   bool select_min;
   bool use_index_input       = true;
   bool use_same_leading_bits = false;
+  bool use_memory_pool       = true;
 };
 
 inline auto operator<<(std::ostream& os, const params& ss) -> std::ostream&
diff --git a/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb b/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb
new file mode 100644
index 0000000000..674ce3c906
--- /dev/null
+++ b/cpp/scripts/heuristics/select_k/algorithm_selection.ipynb
@@ -0,0 +1,443 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6bd615f3-0e28-4ea8-8878-49fd62d833cd",
+   "metadata": {},
+   "source": [
+    "# Select the selection algorithms\n",
+    "\n",
+    "The idea here is that we can simplify the decision logic, reduce the binary size\n",
+    "and speed up the compilation time by only including a subset of selection algorithms.\n",
+    "We're aiming to get algorithms that perform well in different situations, and complement\n",
+    "each other - so to do this, we're iteratively removing the worst performing algorithm,\n",
+    "after which algorithms are re-evaluated on their speedups relative to the remaining\n",
+    "algorithms. This gets us a minimum spanning set of selection algorithms that performs\n",
+    "well over diverse inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "fbf08b4e-7a91-4c4b-8320-ded040d3f827",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>key_type</th>\n",
+       "      <th>index_type</th>\n",
+       "      <th>algo</th>\n",
+       "      <th>row</th>\n",
+       "      <th>col</th>\n",
+       "      <th>k</th>\n",
+       "      <th>use_index_input</th>\n",
+       "      <th>use_memory_pool</th>\n",
+       "      <th>time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix8bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000050</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000033</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000033</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kWarpImmediate</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000022</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kWarpFiltered</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000024</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179963</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.001018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179964</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.001018</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179965</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix8bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179966</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000072</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179967</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000072</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>179968 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       key_type index_type                   algo   row   col     k  \\\n",
+       "0        double    int64_t            kRadix8bits     1  1024     1   \n",
+       "1        double    int64_t           kRadix11bits     1  1024     1   \n",
+       "2        double    int64_t  kRadix11bitsExtraPass     1  1024     1   \n",
+       "3        double    int64_t         kWarpImmediate     1  1024     1   \n",
+       "4        double    int64_t          kWarpFiltered     1  1024     1   \n",
+       "...         ...        ...                    ...   ...   ...   ...   \n",
+       "179963    float   uint32_t           kRadix11bits  1075  2042  8175   \n",
+       "179964    float   uint32_t  kRadix11bitsExtraPass  1075  2042  8175   \n",
+       "179965    float   uint32_t            kRadix8bits  1075  2042  8175   \n",
+       "179966    float   uint32_t           kRadix11bits  1075  2042  8175   \n",
+       "179967    float   uint32_t  kRadix11bitsExtraPass  1075  2042  8175   \n",
+       "\n",
+       "        use_index_input  use_memory_pool      time  \n",
+       "0                     0                0  0.000050  \n",
+       "1                     0                0  0.000033  \n",
+       "2                     0                0  0.000033  \n",
+       "3                     0                0  0.000022  \n",
+       "4                     0                0  0.000024  \n",
+       "...                 ...              ...       ...  \n",
+       "179963                0                0  0.001018  \n",
+       "179964                0                0  0.001018  \n",
+       "179965                0                1  0.000059  \n",
+       "179966                0                1  0.000072  \n",
+       "179967                0                1  0.000072  \n",
+       "\n",
+       "[179968 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from select_k_dataset import load_dataframe, get_dataset\n",
+    "\n",
+    "df = load_dataframe(\"select_k_times.json\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c0d9fed5-35c3-4b0b-987a-973e93e0c59c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from collections import Counter\n",
+    "\n",
+    "def rank_algos(df, use_relative_speedup=False):\n",
+    "    _, y, weights = get_dataset(df)\n",
+    "    times = Counter()\n",
+    "    for algo, speedup in zip(y, weights):\n",
+    "        times[algo] += speedup if use_relative_speedup else 1\n",
+    "    return sorted(times.items(), key=lambda x:-x[-1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "56c5dd8e-6f44-4ef3-b825-1d5a5d6698a2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('kRadix11bits', 7267),\n",
+       " ('kWarpDistributedShm', 6861),\n",
+       " ('kFaissBlockSelect', 3620),\n",
+       " ('kRadix8bits', 3229),\n",
+       " ('kWarpDistributed', 2619),\n",
+       " ('kWarpImmediate', 2584),\n",
+       " ('kRadix11bitsExtraPass', 2260),\n",
+       " ('kWarpFiltered', 490)]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# show the number of times each algorithm is fastest for a given k/# of rows/# of cols / dtype / memory pool etc\n",
+    "rank_algos(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ec63f794-0bdf-4afe-92a8-3fa8ab7a1648",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('kRadix11bits', 3591),\n",
+       " ('kWarpDistributedShm', 3589),\n",
+       " ('kFaissBlockSelect', 2006),\n",
+       " ('kWarpImmediate', 1552),\n",
+       " ('kWarpDistributed', 1448),\n",
+       " ('kRadix11bitsExtraPass', 1338),\n",
+       " ('kRadix8bits', 460),\n",
+       " ('kWarpFiltered', 290)]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# kRadix8bits seems to have a performance issue with 64 bit index types, it is one\n",
+    "# of the worst performing algorithms for 64bit indices, but one of the top 3 for 32 bit\n",
+    "rank_algos(df[df.index_type == \"int64_t\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9b7de19f-ddb6-4fa6-b423-db384428d701",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('kRadix11bits', 3676),\n",
+       " ('kWarpDistributedShm', 3272),\n",
+       " ('kRadix8bits', 2769),\n",
+       " ('kFaissBlockSelect', 1614),\n",
+       " ('kWarpDistributed', 1171),\n",
+       " ('kWarpImmediate', 1032),\n",
+       " ('kRadix11bitsExtraPass', 922),\n",
+       " ('kWarpFiltered', 200)]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rank_algos(df[df.index_type == \"uint32_t\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "bc0a10ea-652b-4822-8587-514c8f0348c3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "selected {'kRadix11bits', 'kWarpDistributedShm', 'kFaissBlockSelect'}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[('kRadix11bits', 12736),\n",
+       " ('kWarpDistributedShm', 12317),\n",
+       " ('kFaissBlockSelect', 3877)]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# do an algorithm selection pass, repeatedly remove the lowest performing algorithm\n",
+    "#\n",
+    "# The idea here is that we can simplify the decision logic, reduce the binary size\n",
+    "# and speed up the compilation time by only including a subset of selection algorithms.\n",
+    "# we're aiming to get algorithms that perform well in different situations, and complement\n",
+    "# each other - so to do this, we're iteratively removing the worst performing algorithm,\n",
+    "# after which algorithms are re-evaluated on their speedups relative to the remaining\n",
+    "# algorithms. This gets us a minimum spanning set of selection algorithms that performs\n",
+    "# well over diverse inputs.\n",
+    "#\n",
+    "# note: the lowest performing algorithm here might actually be pretty good, but\n",
+    "# just not provide much benefit over another similar algorithm. \n",
+    "# As an example, kWarpDistributed  is an excellent selection algorithm, but in testing \n",
+    "# kWarpDistributedShm is slightly faster than it in situations where it does well, \n",
+    "# meaning that it gets removed early on in this loop\n",
+    "current = df[df.use_memory_pool == True]\n",
+    "algos = set(df.algo)\n",
+    "\n",
+    "# we're arbitrarily getting this down to 3 selection algorithms\n",
+    "while len(algos) > 3:\n",
+    "    times = rank_algos(current, use_relative_speedup=False)\n",
+    "    algo, speedup = times[-1]\n",
+    "    algos.remove(algo)\n",
+    "    current = df[df.algo.isin(algos)]\n",
+    "\n",
+    "print(\"selected\", algos)\n",
+    "rank_algos(current)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "028ebbb1-5289-4104-a13c-494c74742087",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# experimenting with different subsets of index type / dtype / use memory seems\n",
+    "# to pretty consistently show that kRadix11bits / kWarpDistributedShm / kFaissBlockSelect\n",
+    "# all get selected here"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb b/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb
new file mode 100644
index 0000000000..12b06bdd47
--- /dev/null
+++ b/cpp/scripts/heuristics/select_k/generate_heuristic.ipynb
@@ -0,0 +1,587 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7e298e01-190d-4839-9d44-1360c9cd2b75",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Learning to select the fastest `select_k` algorithm\n",
+    "\n",
+    "Using the dataset of algorithm times generated by `generate_dataset.py` here - this notebook trains a\n",
+    "DecisionTree model that will predict the fastest algorithm based of the number of rows and columns in the input\n",
+    "array, as well as the `k` value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "40fef43f-194f-4b06-9eba-fdd8af42d214",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from select_k_dataset import load_dataframe, get_dataset\n",
+    "import sklearn.tree\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "56765f40-96ce-46c6-bce8-ab782cd72b6e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>key_type</th>\n",
+       "      <th>index_type</th>\n",
+       "      <th>algo</th>\n",
+       "      <th>row</th>\n",
+       "      <th>col</th>\n",
+       "      <th>k</th>\n",
+       "      <th>use_index_input</th>\n",
+       "      <th>use_memory_pool</th>\n",
+       "      <th>time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000024</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kWarpDistributedShm</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kFaissBlockSelect</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000005</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>double</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000008</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>double</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kWarpDistributedShm</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179942</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>7586</td>\n",
+       "      <td>162460</td>\n",
+       "      <td>8149</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.021265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179948</th>\n",
+       "      <td>double</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000317</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179954</th>\n",
+       "      <td>double</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000154</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179960</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000138</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179966</th>\n",
+       "      <td>float</td>\n",
+       "      <td>uint32_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000072</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>34909 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       key_type index_type                 algo   row     col     k  \\\n",
+       "9        double    int64_t         kRadix11bits     1    1024     1   \n",
+       "14       double    int64_t  kWarpDistributedShm     1    1024     1   \n",
+       "15       double    int64_t    kFaissBlockSelect     1    1024     1   \n",
+       "25       double   uint32_t         kRadix11bits     1    1024     1   \n",
+       "30       double   uint32_t  kWarpDistributedShm     1    1024     1   \n",
+       "...         ...        ...                  ...   ...     ...   ...   \n",
+       "179942    float   uint32_t         kRadix11bits  7586  162460  8149   \n",
+       "179948   double    int64_t         kRadix11bits  1075    2042  8175   \n",
+       "179954   double   uint32_t         kRadix11bits  1075    2042  8175   \n",
+       "179960    float    int64_t         kRadix11bits  1075    2042  8175   \n",
+       "179966    float   uint32_t         kRadix11bits  1075    2042  8175   \n",
+       "\n",
+       "        use_index_input  use_memory_pool      time  \n",
+       "9                     0                1  0.000024  \n",
+       "14                    0                1  0.000010  \n",
+       "15                    0                1  0.000005  \n",
+       "25                    0                1  0.000008  \n",
+       "30                    0                1  0.000010  \n",
+       "...                 ...              ...       ...  \n",
+       "179942                0                1  0.021265  \n",
+       "179948                0                1  0.000317  \n",
+       "179954                0                1  0.000154  \n",
+       "179960                0                1  0.000138  \n",
+       "179966                0                1  0.000072  \n",
+       "\n",
+       "[34909 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# load up the timings from the MATRIX_BENCH script into a pandas dataframe\n",
+    "df = load_dataframe(\"select_k_times.json\")\n",
+    "\n",
+    "# we're limiting down to 3 different select_k methods - chosen by \n",
+    "# the 'algorithm_selection.ipynb' script here\n",
+    "df = df[df.algo.isin([\"kRadix11bits\", \"kWarpDistributedShm\", \"kFaissBlockSelect\"])]\n",
+    "\n",
+    "# we're also assuming we have a memory pool for now\n",
+    "df = df[(df.use_memory_pool == True)]\n",
+    "# df = df[(df.index_type == 'int64_t') & (df.key_type == 'float')]\n",
+    "\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1cc0bbbc-47a7-4e41-acf1-3bbcee91e1b1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((12295, 4), (2170, 4))"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# break down into a train/set set\n",
+    "X, y, weights = get_dataset(df)\n",
+    "train_test_sets = sklearn.model_selection.train_test_split(X, y, weights, test_size=0.15, random_state=1)\n",
+    "X_train, X_test, y_train, y_test, weights_train, weights_test = train_test_sets\n",
+    "X_train.shape, X_test.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a46cfff4-97db-46da-8c00-89c2b0557b91",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(max_depth=6, max_leaf_nodes=20)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier(max_depth=6, max_leaf_nodes=20)</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "DecisionTreeClassifier(max_depth=6, max_leaf_nodes=20)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = sklearn.tree.DecisionTreeClassifier(max_depth=6, max_leaf_nodes=20)\n",
+    "model.fit(X_train, y_train) #, weights_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "32dc14a0-2c9e-414f-9443-c39ad5ab2b9a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9952052831917203"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_train, y_train, weights_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "19ffa733-fda7-4758-92da-8e9ea5d8ef9c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9998545056217218"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.score(X_test, y_test, weights_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a55279d6-7b7e-4197-afb0-ff10efcb1d32",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# print(sklearn.tree.export_text(model, feature_names=[\"k\", \"rows\", \"cols\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "058e488c-f0dd-40c8-a217-e8767d6439ec",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAABAIAAAOwCAYAAACkhdZFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yN9///8cdJEKN2bEWNLJFEYkRIKzFrxai9aflQbUOL2qpW7VSLqL2VGjVK7b1JjKC09t5CEjKu3x9+PV9pYoeT8bzfbrndnPO+xvOcnp73dV7X+3pfJsMwDEREREREREQkRbCydAAREREREREReXdUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFUCBARERERERFJQVQIEBEREREREUlBVAgQERERERERSUFSWTqAiIiIvL7z589z8+ZNS8eQFMjW1pYCBQpYOoaIiLwGFQJERESSqPPnz+Po6EhYWJilo0gKlD59eo4fP65igIhIEqRCgIiISBJ18+ZNwsLCmDNnDo6OjpaOIynI8ePHadGiBTdv3lQhQEQkCVIhQEREJIlzdHTE3d3d0jFEREQkidBkgSIiIiIiIiIpiAoBIiIiIiIiIimICgEiIiIiIiIiKYgKASIiImJRJpOJu3fvWmz/Q4cOxd7eHisrK5YtWxarrW3btri4uODm5kbp0qXZsGFDnPU3btyItbU148aNe+Y+9uzZg6urK3Z2dvj6+nLp0qUEfhUiIiIvT4UAERERSdLu3r1LdHT0a69fuXJl/vjjDz788MM4bWPHjuXw4cMEBQUxefJkGjZsSExMjLn93r17fPvtt9SoUeOZ24+JiaF58+aMGzeOv/76ixo1auDv7//aeUVERN6UCgEiIiKSKBiGQc+ePalTpw5hYWHPXTY8PJxFixZRr1493NzciIiIeO39lilThsKFC8fbliVLFvO/7927F6e9S5cu9O3bl+zZsz9z+wcOHCBVqlT4+PgA0LFjR1asWPFGmUVERN6ECgEiIiJicY8ePaJp06Y8ePCApUuXkj59+jjLREdHs3btWlq3bo2TkxPr1q3jiy++4J9//iFDhgwAjBw5Ejc3t3j/li5d+lrZvv32W4oUKUL9+vX57bffsLJ6cvi0ePFirKysqFOnznPXP3/+PAULFjQ/zpgxI5kyZeLy5cuvlUdERORNpbJ0ABEREZGaNWvi5+dHv379nrmMh4cHV69eJSAggClTppA6deo4y3Tv3p3u3bsnaLbhw4czfPhw1q9fT48ePdixYwe3b99m8ODBbN68OUH3JSIi8i5oRICIiIhYnK+vL+vWreP+/fvPXGbKlCk0adKE3r1707x5c5YuXcqjR49iLfM2RgT8q3LlyoSGhnLkyBEOHDjAlStXcHNzo1ChQixevJhBgwbRp0+fOOsVKFCAc+fOmR+HhoZy79498ubN+0Z5REREXpdGBIiIiIjF9e7dmxkzZpgn7ovvmvtSpUpRqlQpxowZw8aNG5k3bx7+/v5UrFiRyZMnY2Njk6AjAiIjIzl37hxFixYFYO/evVy/fp3ChQvj4eHBtWvXzMu2adMGNze3eCcB9PDwIDIykk2bNuHj40NgYCC1a9cmbdq0CZJTRETkVakQICIiIomCv78/GTJkwNfXl7Vr15I7d+54l7OysqJy5cpUrlyZR48esWrVKgzDeO39Dh48mEmTJnHjxg2OHj1Kly5dOHToEBkyZKB169bcu3ePVKlSkSFDBhYvXkzWrFlfuM1JkyZx+fJlBg0ahJWVFXPmzKFjx45ERESQN29eZs+e/dp5RURE3pTJeJOeU0RERCzm4MGDeHh4cODAAdzd3S0dR1IQffZERJI2zREgIiIiIiIikoKoECAiIiIiIiKSgqgQICIiIolKjRo1OHny5AuX69+/P3Pnzk2w/e7ZswdXV1fs7Ozw9fXl0qVL8S43Y8YMMmfObL4bgY+PT6z2LVu2ULp0aYoXL46TkxO7du0CYOnSpbi4uODm5oaTkxN9+vR5o7kNREREXpcmCxQREZFEZfXq1S+13KBBgxJsnzExMTRv3pxffvkFHx8fRo0ahb+/P4sWLYp3eR8fH5YtWxbn+cuXL9O6dWv++OMPHB0defToEeHh4cCT2w/6+flhZWXF48ePqVChAqVKlaJevXoJ9jpERERehkYEiIiIyDu3fPlyHB0dcXV1pWfPntja2nL27FkAChUqRFBQEAAVK1bkm2++wdvbmyJFivC///3PvI02bdowbty4BMlz4MABUqVKZT6737FjR1asWEFERMQrbWfChAk0a9YMR0dHAGxsbMiSJQsAGTNmxMrqyaFXREQEjx49wmQyJUh+ERGRV6FCgIiIiLxT169fp127dixdupTg4GAcHBy4devWM5f/+++/2bRpE0ePHmXt2rXmofbP07hxY/PQ/f/+XbhwIc7y58+fp2DBgubHGTNmJFOmTFy+fDne7W/fvh03Nze8vLxijRoICQkhPDycypUr4+bmxhdffMHDhw/N7Tt37qREiRLkzJkTX19f/Pz8XvhaREREEpoKASIiIvJO7d69GxcXFxwcHABo3bo1adKkeebyjRs3JlWqVKRLlw43Nzf+/vvvF+5j4cKFBAUFxfv3/vvvv1H+WrVqcf78eYKCgpg6dSrdunVj9+7dAERFRbF161YWLVrEvn37uHPnDgMGDDCv6+XlxZEjR7hw4QIHDhxg27Ztb5RFRETkdWiOABEREUnU0qZNa/63tbU1UVFRL1yncePGz5xwcMWKFXGKAQUKFODcuXPmx6Ghody7d4+8efPGWd/W1tb8b0dHR2rUqMGOHTvw9PSkQIECuLm5kTVrVgCaNm3KsGHD4mwjR44c1KhRg0WLFvHhhx++8PWIiIgkJBUCRERE5J3y9PTk8OHDnDx5Ent7e+bMmcPjx48TdB8LFy58peU9PDyIjIxk06ZN+Pj4EBgYSO3atWMVIf516dIl8uXLB8C1a9fYuHEjjRs3BqBZs2b07NmTR48eYWNjwx9//IGrqysAJ06cwM7ODisrK0JDQ1m1ahWtWrV6w1cqIiLy6lQIEBERkXcqZ86cTJkyhbp162JjY0OVKlV47733zJPqWYKVlRVz5syhY8eOREREkDdvXmbPnm1ur1GjBoMGDaJUqVL8/PPPLF++nNSpUxMTE0PXrl3x9fUFngz9r1OnDiVLlsTa2prixYszadIk4ElxYuHChaROnZro6Gg++eQTPv30U4u8XhERSdlMhm5gKyIikiQdPHgQDw8PDhw4gLu7u6XjvJLQ0FAyZswIwLJly+jVqxfHjx+3cCp5WUn5syciIhoRICIiIhYwfvx4Fi5cSHR0NJkyZWLu3LmWjiQiIpJiqBAgIiIi71zv3r3p3bu3pWOIiIikSLp9oIiIiIiIiEgKokKAiIiIJDkDBw7E39/fYvs/e/YsFStWJHPmzLi5ucVq27hxI2XKlMHJyYnixYvTo0cPYmJiADhy5AgffvghDg4OODs7065dO8LDw83rzp49G1dXV5ydnalUqRLnz583txUqVAh7e3vc3Nxwc3N75TsjiIiI/EuFABEREZFXlClTJgYPHsy8efPitGXNmpUFCxYQEhLCgQMH2LlzJ7NmzQIgbdq0/PTTT5w4cYLg4GAePnzIDz/8ADy5vWD37t1Zs2YNR48epW3btnTq1CnWthcuXEhQUBBBQUHmWxaKiIi8KhUCRERE5LWEh4fTuHFjnJyccHV1pWrVqgBcvXoVHx8fPDw8KF68OF26dDGfEZ8xYwaVK1emadOmODk54eXlRUhICPXq1cPR0ZGqVavy4MED4MlZ/wYNGuDr64uDgwO1a9fm1q1b8WYZNWoUZcqUwd3dnerVq3Pu3DkAVqxYgYuLC25ubjg7O7N8+fIEee3ZsmWjQoUKZMiQIU5byZIlKVy4MPDkh7+bmxtnz54FoFixYri4uABgbW1N6dKlzW1Hjx7FxcWFPHnyAE9uWfjHH3888zWLiIi8LhUCRERE5LWsWbOGu3fvEhISQnBwMAsWLAAgS5YsrFixggMHDnD48GHOnj3Lr7/+al5v3759/PDDD4SEhFCkSBFq167NpEmTOH78OGnSpGHmzJnmZbdt28a8efM4ceIE77//Pr169YqTY968eZw8eZJdu3Zx8OBBmjdvTufOnQHo27cvgYGBBAUFcfjwYT766KM464eGhpqH2//3r1q1am/0Hl29epXFixdTq1atOG0PHz5kypQp+Pn5AeDq6srBgwf566+/AJgzZw6GYZiLGgCtWrWiRIkStG/fnhs3brxRNhERSbl01wARERF5La6urhw/fpzOnTvz0UcfUaNGDQBiYmLo2bMn27dvxzAMrl+/jrOzM02aNAGgXLlyFChQAIBSpUoRGRlJrly5AChdujSnTp0y76NmzZrkzp0bgA4dOlC/fv04OZYtW8a+ffvw8PAAIDo62txWqVIlvvrqKz755BOqVq0a53p+gIwZMxIUFPTmb8h/3L9/n9q1a9OjRw9KlSoVq+3x48c0btyYqlWrUq9ePeDJaIFJkybRqlUroqKiqFmzJlmyZCFVqieHa1u3bqVAgQJERkbSt29fWrduzerVqxM8t4iIJH8qBIiIiMhrKVy4MCEhIWzcuJH169fTo0cPgoKC+Pnnn7l+/Tp79uwhbdq0dOvWjYiICPN6adOmNf/b2to6zuOoqKhn7tNkMsV5zjAMevXqRYcOHeK0jRkzhmPHjrFp0yZat25N8+bN6dGjR6xlQkND8fb2jnd/uXLlYu3atc9+E54hNDSU6tWr4+fnR7du3WK1RUZG0rhxY/LkyUNAQECstk8++YRPPvkEeDKa4IcffqBo0aIA5uJJ6tSp8ff3x87O7pVziYiIgAoBIiIi8pouXrxI1qxZqVOnDtWrV2fZsmVcuHCBO3fukDt3btKmTcvVq1dZtGgRDRo0eK19rF69mmvXrpErVy6mTJlC5cqV4yxTt25dRo8ezSeffEK2bNmIjIzk6NGjlCxZkhMnTlC8eHGKFy9OqlSp+PPPP+Osn9AjAh48eED16tWpXr06ffv2jdUWFRVFkyZNyJYtG5MnT45T2Lhy5Qp58uQhOjqanj178vnnn5M+fXoePnxIZGQkWbJkAWD+/PmULFkywTKLiEjKokKAiIiIvJYjR47Qq1cvDMMgKiqKli1b4uLiYh6KX7x4cfLmzRvvj/eX5e3tTbNmzbh06RLFihVjxowZcZZp3rw5t27dwsfHB3jyY7tdu3aULFmS3r17c/LkSdKkSUP69OmZOHHia2d5WlhYGHZ2djx69Ih79+6RP39+WrZsybBhwwgICGDv3r08fPiQJUuWANCwYUP69OnDwoULWbJkCS4uLuYf8uXLl+fnn38GoF27dpw7d45Hjx5Rs2ZNhg4dCsC1a9do0KAB0dHRGIZB4cKFzXciEBEReVUmwzAMS4cQERGRV3fw4EE8PDw4cOAA7u7ulo6T4AYOHMjdu3cZN26cpaPIfyT3z56ISHKnuwaIiIiIiIiIpCC6NEBEREQSpYEDB1o6goiISLKkEQEiIiIiIiIiKYgKASIiIiIiIiIpiAoBIiIiYjZw4ED8/f0tHQOTyUSJEiVYvXo1AA8fPqRt27aUKFECBwcHvv32W/6d73jz5s2kS5cONzc38194eDgAGzdupEyZMjg5OVG8eHF69OhBTEzMS2U4f/48tWvXxt7eHicnJ8aPHw/AmTNn8PDwwM3NDWdnZxo2bMidO3fM661cuRIHBweKFStG/fr1uX//vrnthx9+wMnJCTc3Nzw9Pdm7d+8rvR//vr5t27aZ2/bs2YOrqyt2dnb4+vpy6dIl83tWtmxZXF1dcXV1pXr16pw9e/aF2wwPD8fNzY333nuPZcuWvVQ+ERFJWlQIEBERkURp27Zt1KhRA4ChQ4cSHR3N4cOHOXLkCMHBwSxevNi8rL29PUFBQea/dOnSAZA1a1YWLFhASEgIBw4cYOfOnS912z3DMKhXrx6tWrXi5MmThISE0KhRIwDy5s3L9u3bCQoK4ujRo+TNm9c8n8GDBw9o3749y5Yt49SpU+TNm5fvv/8egKCgICZMmMDevXsJCgqiS5cudOnS5ZXej39fn7e3NwAxMTE0b96ccePG8ddff1GjRg1zISddunSsX7+e4OBggoODqVatGl999dULt5kuXTqCgoIoVarUS2cTEZGkRYUAERGRZGjIkCGxfmQ+ePCAbNmycePGDY4cOUKFChVwd3fHycmJwYMHx7uNGTNmULduXfPjlStXUrFiRfPj2bNnU7ZsWdzd3fnwww8JDg5+Wy+H4OBgqlevjslkInXq1FSpUoXZs2e/cL2SJUtSuHBhANKmTYubm1uss+LPsmHDBmxsbGjYsKH5uVy5cgFgY2NjLjRER0fz8OFDTCYTAH/88QclS5bEwcEBgM6dOzN//nzgyRn4yMhIHj58CMDdu3fJnz//S74D8Ttw4ACpUqXCx8cHgI4dO7JixQoiIiKwsrIiY8aMwJPCxv379805RUQkZdNdA0RERJKhVq1a4eHhwejRo7GxsWHRokX4+PiQI0cO0qZNa/6hGx4ejpeXF5UrV8bT0/Olt79jxw7mz5/P1q1bsbGxYdu2bTRr1oxjx47FWbZr165s2rQp3u0EBgZStmzZF+7Pw8ODRYsW0aBBAyIjI1m2bBl37941t//999+4u7tjbW1N27Zt6dy5c5xtXL16lcWLF7Ny5coX7i8kJIQcOXLQpEkTTp48SaFChRg9erS5qPD48WPKlCnDuXPncHFx4ffffweeXE5QsGBB83YKFSrElStXiIqKwtXVla5du/LBBx+QLVs2bGxs2Lp16wuz/KtSpUpERUVRqVIlvv/+ezJkyBBnfxkzZiRTpkxcvnzZnLVy5cocOXKEHDlysHbt2hduU0REkj+NCBAREUmG3n//fUqWLGn+gTpjxgzatm0LPLkG/NNPP6VEiRJ4enpy7tw5goKCXmn7y5cvJzg4mLJly+Lm5sYXX3zB7du3zdfmP23s2LGxhu0//fcyRQCAb7/9lgIFClC2bFlq1qxJmTJlSJXqyfkMd3d3Ll68yMGDB1m6dCmTJk3i119/jbX+/fv3qV27Nj169HipIe9RUVFs3LiRfv36cejQIapVq2a+NAAgTZo0BAUFce3aNRwcHAgMDHzhNs+cOcOSJUs4ffo0Fy9epGvXrjRu3PilXv+5c+fMlzbcuHGD7t27v9R6AOvXr+fKlSs0btyYIUOGJMg2RUQkaVMhQEREJJlq164d06dP559//uH06dNUr14dgN69e2Nra8uhQ4cIDg6mYsWKRERExFk/VapUREdHmx8/vYxhGLRu3TrWj/orV66Yh8w/rWvXrrEm8nv6b8+ePS/1WtKlS0dAQABBQUFs2bIFW1tbihcvDkCmTJnInDkzAPnz56dp06axJtMLDQ2levXq+Pn50a1bt5faX4ECBShZsqR5Hy1btuTgwYNERkbGWi5NmjS0bdvWfJlCgQIFOHfunLn97Nmz5MmTh1SpUvHbb79RokQJ8ubNC0Dbtm3ZsWMHjx8/fqk8ABkyZKBz587m1/ff/YWGhnLv3j3zPv5lZWXFZ599FutyimdtU0REkj8VAkRERJKpunXrsm/fPoYNG0aLFi3MZ9Dv3LlD/vz5SZUqFSdPnmTdunXxrl+0aFEOHz5MeHg4UVFRzJs3z9xWp04d5syZw/nz54Enk9bt378/3u0kxIiA+/fvExYWBjw5sz5x4kS+/vprAK5cuWK+E0BoaCgrV66kZMmSwJO5EapXr0716tXp27dvnO06ODiYZ9l/2scff8zFixfNbatXr8bR0ZHUqVNz7tw5c5aYmBgWLVqEi4sLANWrV+fgwYOcOHECgAkTJtCkSRMAChcuzI4dO3jw4AHwZM4FOzs70qRJAzwZph/fXQTu3LkTa38LFy40vz4PDw8iIyPNl14EBgZSu3Zt0qZNy9WrV2PdzWDhwoXmnM/bpoiIJH+aI0BERCSZsrGxoVGjRkyYMIHjx4+bn+/bty8tW7Zk5syZFClSBF9f33jX9/T0pEaNGjg7O5MnTx7Kly9vPoPv7e3NiBEjqFevHlFRUTx+/JiaNWu+tZnm//nnHxo1akSqVKlIlSoVY8eOxc3NDYDffvuNiRMnkipVKqKiomjYsKH5MoiAgAD27t3Lw4cPWbJkCQANGzakT58+XL9+nVu3bpEtW7Y4+8uQIQOTJk2iZs2aGIZB5syZWbBgAQCHDx+mT58+wJMf0e7u7vz444/Ak2v0p0yZQt26dYmKisLZ2ZmZM2cCUK9ePfbt20epUqWwsbEhQ4YM5uJKdHQ0wcHB8U4eeOLECTp27IjJZCIqKgp3d3cCAgKAJ2f658yZQ8eOHYmIiCBv3rzms/7nz5+nY8eOREdHYxgGRYoUYc6cOS/cpoiIJH8m49+b8IqIiEiScvDgQTw8PDhw4ADu7u6WjpOgTCYTd+7cIUuWLG9tH4sWLeLkyZPxjhR41/bt20dgYCBTpkyxdBSzihUr4u/vH+vOEf9Kzp89EZGUQCMCREREJNHJlSsXH330EcOGDaNGjRpvZR9P3xrQ0kqXLk3p0qUtHQN4MplkuXLluH37NmnTprV0HBEReQs0R4CIiIi8kkKFCr3yXQZe1dWrVwkODn6lIoDJZIp1S8F/zZgxg8yZM+Pm5oazszM+Pj789ddfr5zpwYMHmEwm82M3NzdCQ0NfuE61atWwtbWNM7rheW2bN282X/rwX/v37zffbeDu3bsMHz78lV/L86RLl46goCDOnz9vnmBSRESSFxUCREREJNnz8fEhKCiIo0eP4uHhgb+//xtvMygoiIwZMz53mdSpU9OzZ0/Wr1//Sm3PU6pUKRYuXAi8nUKAiIgkfyoEiIiISLx27dpFhQoVcHV1xcXFheXLl8dZZsyYMZQuXRo3NzdKly7Nrl27gCeT6HXp0gVHR0dcXV3x8PAgIiKCGzduULVqVUqUKIGLi4t5Ur+EZBgGPXv2pE6dOuaZ8Z9WqVKlWLfca968OaVKlcLFxYWaNWty9epVc1tgYCDFihWjZMmSjB07NtZ2/h2BcOPGDQoVKsTu3bsBWLx4Ma6uroSHh2NjY4Ovr2+8cx08rw0gKiqKVq1a4ezsjIeHh3kUxtOjBf73v/8RGhqKm5ubeaLGwYMH4+joaL5F49OvVUREBDRHgIiIiMTj9u3b1K1bl8WLF+Pt7U1MTEy8w+5btmxJt27dANi9ezdt2rThxIkTBAcHs2HDBo4dO4aVlRX37t0jTZo0zJkzhw8++IA///zTvJ/4dO3a1XxLvP8KDAx85m0HHz16RNOmTcmePTtLly7F2to6VntMTAxLly4139IPYNy4ceTIkQOA4cOHM3DgQCZNmsTRo0cZMGAAhw4dIk+ePPTu3TvefebIkYPZs2fTvHlz5s+fj7+/Pxs3biRdunTxLv+yjh07RkBAALNmzeLXX3+lSZMmse7+ADBp0iTc3NzMRYI7d+4watQorly5Qrp06QgLC8PKSud9REQkNhUCREREJI5du3Zhb2+Pt7c38OQ2dfHdZu/QoUMMGTKEW7dukSpVKk6ePEl4eDiFCxcmKiqKdu3a4ePjQ82aNbGyssLT05OxY8fy9ddf8+GHHz7zGvT/nn1/WTVr1sTPz49+/frFen7Tpk24ublx/vx5smXLZr4NIsC8efOYPXs2ERERREREYGtrC8DGjRv5+OOPyZMnDwCdOnVi2LBh8e7X29ub9u3b4+XlxaxZs7Czs3ut/E8rVKgQlSpVAqBRo0Z06NCBCxcuPHedTJkyUaxYMVq0aEHVqlWpWbNmvLckFBGRlE0lYhEREXktjx8/pn79+owaNYqjR4+ydetW4MlZ+cyZM3P06FGaNWvGiRMncHFx4fTp05QrV46goCDKli3LkiVLKF26NNHR0XG23bVrV/PQ9v/+Pf0j/r98fX1Zt24d9+/fj/X8v3MEXLx4ETs7Ozp37gzA9u3b+fHHH1m9ejVHjx5lzJgxRERExLvtpycKjM+hQ4fIkSPHC3+svy6TyfTCDNbW1uzevRt/f3+uX7+Op6cn27Zteyt5REQk6dKIABEREYnDy8uLU6dOsW3btliXBjw9KiAiIoLHjx9ToEABAMaPH29uu3HjBtbW1lStWpUqVaqwZcsWQkJCsLa2Jl++fDRq1Ijq1auTM2dOHjx4QObMmWPt/3VHBPTu3ZsZM2ZQuXJl/vjjD7Jnzx6rPX369EyZMgV7e3sOHTrEnTt3yJgxI9mzZ+fx48cEBgaal/X19WXYsGFcvXqV3LlzM2nSpGfu96effuLOnTsEBwfj6elJhQoVKF++/Gu9hn+dPXuWTZs24ePjw+LFi8mVKxf58+fn77//Ni+TKVMmwsPDefz4MWnSpCE0NJTQ0FC8vb3x9vbm2LFjHDp0yDyyQ0REBDQiQEREROKRNWtWli5dyrfffouLiwvu7u7s2LEj1jKZMmVi8ODBlClTBg8PD9KkSWNuu3DhAlWqVMHFxQVnZ2ecnZ35+OOP2bx5Mx4eHri5ueHl5cXIkSPjFAHelL+/P5999hm+vr6xJv77V968efnmm2/o378/1atXx97e3nwZxNO37HN2dmbgwIF4e3tTsmRJbGxs4t3fwYMHGTVqFHPnziVnzpzMmTOHli1bcuvWLQBcXFwoV64c9+/fJ3/+/LRs2dK87vPaihcvzowZMyhRogTDhg1j/vz5cUYEZMuWjVatWuHi4kKpUqW4d+8e9evXN0/GGBkZSevWrd/k7RQRkWTIZBiGYekQIiIi8uoOHjyIh4cHBw4cwN3d3dJxJAXRZ09EJGnTiAARERERERGRFESFABEREREREZEURIUAERERERERkRREhQARERERERGRFESFABEREREREZEURIUAERERERERkRQklaUDiIiIyJs5fvy4pSNICqPPnIhI0qZCgIiISBJla2tL+vTpadGihaWjSAqUPn16bG1tLR1DREReg8kwDMPSIUREROT1nD9/nps3bz53mevXr/PLL7+wfPlysmbNSocOHfDz8yNVKp0PSOliYmJYv349EyZM4MKFC1SpUoVOnTpRsGDBF65ra2tLgQIF3kFKERFJaCoEiIiIJFM3b95k+PDh/Pzzz6RPn55evXrx+eefky5dOktHk0QmKiqKmTNn8t1333H58mXatm1L//79ef/99y0dTURE3gIVAkRERJKZ+/fvM3bsWEaPHo1hGHz99dd069aNTJkyWTqaJHIREREEBgYyZMgQ7t27R+fOnenVqxc5c+a0dDQREUlAKgSIiIgkE+Hh4UyYMIFhw4bx4MEDunTpwrfffqvruOWVhYaGEhAQwMiRI4mOjqZr1658/fXXZMmSxdLRREQkAagQICIiksRFRkYybdo0Bg0axLVr12jfvj39+vUjf/78lo4mSdzt27cZMWIEP/74I2nTpqVnz5588cUXpE+f3tLRRETkDagQICIikkTFxMSwYMEC+vfvzz///EPTpk357rvvKFq0qKWjSTJz5coVhgwZwuTJk8mePTv9+vXj008/JU2aNJaOJiIir8HK0gFERETk1RiGwe+//46bmxvNmzenePHiBAUFMXfuXBUB5K3IkycPP/30EydPnqRq1ap06dIFe3t7Zs2aRXR0tKXjiYjIK1IhQEREJAnZtGkTXl5e+Pn5kT17dnbu3Mny5ctxcXGxdDRJAT744ANmzpzJkSNHcHd3p3Xr1ri4uLBkyRI0yFREJOlQIUBERCQJ2Lt3L1WqVMHX15fo6GjWrVvHxo0bKVeunKWjSQpUvHhxfvvtN/bu3Uv+/Plp0KABZcqUYd26dSoIiIgkASoEiIiIJGJHjx6lXr16lC1blitXrrB06VL27NlD5cqVMZlMlo4nKVzp0qVZu3YtmzZtInXq1FStWhVfX1927txp6WgiIvIcKgSIiIgkQv/88w8tW7bExcWF4OBgZs2aRXBwMHXr1lUBQBKdihUrsmPHDlasWMHt27cpX748tWvXJjg42NLRREQkHioEiIiIJCKXL1+mc+fO2Nvbs2HDBn7++WdOnDhBy5Ytsba2tnQ8kWcymUzUqlWLQ4cOMX/+fE6cOIGbmxvNmjXj1KlTlo4nIiJPUSFAREQkEbh16xY9evSgSJEiLFy4kKFDh3L69Gk6deqkW7RJkmJlZUWTJk0ICQlh8uTJbN26FUdHRzp06MCFCxcsHU9ERACToRldRERELCY0NJSxY8cyevRoYmJi6NatG926dSNz5syWjiaSICIiIpg4cSJDhw4lNDSUzp0706tXL3LkyGHpaCIiKZYKASIiIhbw3x9HnTp1onfv3vpxJMlWaGgo48aNY9SoUcTExNC1a1e+/vprFb1ERCxAhQAREZF3KDIykhkzZjBo0CCuXLlCu3bt6NevH++//76lo4m8E7du3eKHH35g/PjxpE+fnm+//ZbPP/+c9OnTWzqaiEiKoTkCRERE3oGYmBjmz5+Pk5MTHTp0wNvbm+PHjzN58mQVASRFyZ49OyNGjODvv/+mcePG9O7dm6JFizJx4kQeP35s6XgiIimCCgEiIiJvkWEYrFy5kpIlS9KsWTMcHBwICgpi3rx5FCtWzNLxRCwmb968TJgwgZMnT1K5cmU+//xzHB0dmTNnDtHR0ZaOJyKSrKkQICIi8pZs3rzZfD/1rFmzmu+z7urqauloIolG4cKFmTVrFocPH8bV1ZWWLVvi6urKsmXL0BWsIiJvhwoBIiIiCWzfvn1UrVoVHx8fIiMj+fPPP9m0aRNeXl6WjiaSaDk7O7NkyRL27NlDnjx5qFevHmXLlmX9+vWWjiYikuyoECAiIpJAQkJCqF+/PmXKlOHSpUssWbKEvXv3UqVKFUwmk6XjiSQJZcqUYd26dWzYsAFra2uqVKlCpUqV2L17t6WjiYgkGyoEiIiIvKEzZ87QunVrnJ2dOXToEDNnzuTw4cPUq1dPBQCR1+Tr68vOnTv5/fffuXHjBuXKlcPPz48jR45YOpqISJKnQoCIiMhrunLlCp9//jn29vb8+eef/PTTT5w8eZJWrVphbW1t6XgiSZ7JZKJ27doEBQUxd+5cjh07hqurK82bN+f06dOWjicikmSZDM3CIiIi8kpu375tvg962rRp+fbbb+nSpYvugy7ylkVGRjJ9+nQGDRrE1atXad++Pf379ydfvnyWjiYikqSoECAiIvKSQkNDCQgIYOTIkURHR9O1a1e+/vprsmTJYuloIilKeHg4EydOZOjQoTx48IAuXbrw7bffYmtra+loIiJJggoBIiIiLxAREcGkSZMYOnQo9+7do3PnzvTq1YucOXNaOppIinb//n3Gjh3L6NGjMQyDr7/+mm7dupEpUyZLRxMRSdRUCBAREXmGqKgoZs6cyXfffcfly5dp27Yt/fr1o0CBApaOJiJPuXnzJj/88AM//fQTGTJk4Ntvv+Xzzz8nXbp0lo4mIpIoabJAERGR/4iJiWHhwoU4OTnx6aef4uXlxbFjx/jll19UBBBJhGxtbRk5ciSnT5+mYcOG9OrVi6JFixIYGEhkZKSl44mIJDoqBIiIiPx/hmGwatUq3N3dadKkCXZ2dhw6dIgFCxZgb29v6Xgi8gL58uVj4sSJnDhxAl9fXzp16oSDgwNz584lOjra0vFERBINFQJERESALVu2UKFCBWrVqkXmzJnZvn07K1euxM3NzdLRROQVFSlShNmzZxMcHEyJEiVo0aIFbm5uLF++HF0VKyKiQoCIiKRwBw4coHr16lSsWJFHjx6xdu1aNm/eTPny5S0dTUTeUIkSJVi2bBm7d+8mV65c1K1bl3LlyrFx40ZLRxMRsSgVAkREJEU6fvw4n3zyCaVKleLcuXMsXryYffv2UbVqVUwmk6XjiUgCKlu2LOvXr2f9+vUYhkGlSpWoXLkye/bssXQ0ERGLUCFARERSlLNnz9KmTRucnZ3Zv38/M2bM4OjRozRo0EAFAJFkrlKlSuzevZtly5Zx7do1PD09qVu3LkeOHLF0NBGRd0qFABERSRGuXr1Kly5dsLOzY82aNfz444+cPHmS1q1bY21tbel4IvKOmEwm/Pz8CAoKYs6cORw5cgRXV1datGjB33//bel4IiLvhMnQjCkiIpKM3b59m5EjRxIQEICNjQ09e/bkiy++IEOGDJaOJiKJQGRkJNOmTWPQoEFcv36dTz/9lL59+5IvXz5LRxMReWtUCBARkWTpwYMHBAQEMHLkSKKiovD39+ebb74hS5Yslo4mIolQeHg4P//8M8OGDSMsLIwuXbrQs2dPbG1tLR1NRCTBqRAgIiLJyqNHjwgMDGTIkCHcvXuXTp060atXL3LlymXpaCKSBNy/f58xY8YwevRoTCYTX3/9Nd26dSNjxoyWjiYikmBUCBARkWQhKiqKWbNmMXDgQC5dukSbNm3o378/BQsWtHQ0EUmCbt68yfDhw/npp5/ImDEjvXr1olOnTqRLl87S0URE3pgmCxQRkSQtJiaGX3/9leLFi9O+fXvKlStHSEgIU6dOVRFARF6bra0to0aN4vTp0zRo0IAePXpQrFgxJk+eTGRkpKXjiYi8ERUCREQkSTIMg9WrV+Ph4UHjxo0pWrQoBw8eZOHChdjb21s6nogkE/nz52fSpEmcOHGCjz76iP/97384Ojoyb948YmJiLB1PROS1qBAgIiJJzrZt2/jwww+pWbMmGTNmZNu2baxatYqSJUtaOpqIJFNFixZl7ty5BAUFUbx4cZo3b46bmxsrVqxAV9qKSFKjQoCIiCQZBw8e5OOPP+bDDz/k4cOH/PHHH2zZsoUKFSpYOpqIpBAuLi4sX76cXbt2YWtrS506dfDy8mLTpk2WjiYi8tJUCBARkUTvxIkTNGzYEA8PD86ePcuiRYvYv38/1atXx2QyWTqeiKRAnp6ebNiwgXXr1hEdHY2vry9VqlRh7969lo4mIvJCKgSIiEiide7cOdq1a0fx4sXZu3cv06dP58iRI3zyySdYWakLExHLMplMVK5cmT179rB06VKuXLlC2bJlqV+/PseOHbN0PBGRZ9JRlIiIJDrXrl3jyy+/pFixYqxevZqAgAD++usv2rRpQ6pUqSwdT0QkFpPJRN26dQkODmb27NkEBQVRokQJWrVqxT///GPpeCIicZgMzW4iIiKJxJ07dxg1ahTjxo0jTZo09OjRgy+//JIMGTJYOpqIyEt7/PgxU6dO5fvvv+fGjRt89tln9O3bl7x581o6mogIoEKAiIgkAg8fPuTHH39kxIgRPH78GH9/f7755huyZs1q6WgiIq8tLCyMn3/+meHDhxMWFsYXX3xBz549yZ49u6WjiUgKp0KAiIhYzKNHj5g8eTJDhgzhzp07/O9//6N3797kypXL0tFERBLMvXv3GDNmDGPGjMHKyopvvvkGf39/MmbMaOloIpJCqRAgIiLvXFRUFLNnz2bgwIFcvHiR1q1bM2DAAAoWLGjpaCIib82NGzcYPnw4P//8MxkzZqR379506tSJtGnTWjqaiKQwmixQRETemZiYGBYvXkyJEiVo164dZcuW5dixY0ybNk1FABFJ9nLkyMHo0aM5deoU9erVo3v37hQrVoxffvmFyMhIS8cTkRREhQAREXnrDMNgzZo1lC5dmoYNG/LBBx9w4MABfv31VxwcHCwdT0TknXr//feZPHkyx48fx9vbmw4dOlC8eHEWLFhATEyMpeOJSAqgQoCIiLxV27dv56OPPuLjjz8mffr0bN26ldWrV+Pu7m7paCIiFlWsWDHmzZtHUFAQ9vb2NG3alJIlS7Jy5Up09a6IvE0qBIiIyFtx6NAhatSogbe3Nw8ePGD16tVs3boVb29vS0cTEUlUXF1dWbFiBTt27CBr1qzUrl2b8uXLs3nzZktHE5FkSoUAERFJUCdPnqRx48a4u7vzzz//8Ouvv7J//34+/vhjTCaTpeOJiCRaXl5ebNq0iT///JPIyEh8fHyoVq0a+/fvt3Q0EUlmVAgQEZEEcf78edq3b4+TkxO7d+9m2rRpHD16lIYNG2Jlpe5GRORlmEwmqlSpwt69e1myZAkXL16kdOnSNGjQgJCQEEvHE5FkQkdmIiLyRq5fv46/vz/FihVj5cqVjB07lr/++ou2bduSKlUqS8cTEUmSTCYT9erV4/Dhw8ycOZODBw9SokQJWrduzZkzZywdT0SSOBUCRETkpdy/f5/SpUuzcuVKAO7evUvfvn0pXLgwM2bMYMCAAfz99998+eWX2NjYWDitiEjyYG1tTatWrTh58iTjx4/nzz//xN7eni5dunDlyhXgyZwsTk5OXLhwwcJpRSSpMBmaklRERF5CixYt+P3339m1axcrVqzghx9+4PHjx3z11Vd0796drFmzWjqiiEiyFxYWxk8//cTw4cOJiIjgyy+/pEOHDnz00UcUKVKEDRs2YG1tbemYIpLIqRAgIiIvNGfOHFq2bEnr1q1Zu3Ytt27domPHjvTp04fcuXNbOp6ISIpz9+5dxowZw5gxY7C2tqZhw4ZMnz6dQYMG0adPH0vHE5FEToUAERF5rr/++gtXV1esra0JCwujefPm9OrVCycnJ0tHExFJ8c6ePcvo0aMJDAwkderUREREsGHDBipWrGjpaCKSiKkQICIiz1W4cOF4J6aaMmUK7du3t0AiEREBWLduHdWqVeO/h/OZMmXi3r17FkolIkmBpnMWEZHnGjJkCIcPH+aDDz4gderUpE6dmjRp0lC9enVLRxMRSdE+/PBDFi9eTHh4OJGRkURGRnLx4kVsbW0tHU1EEjmNCBBJYs6fP8/NmzctHUPktdna2lKgQAFLxxARSfJ0TCBvi/rq5E8jAkSSkPPnz+Po6EhYWJilo4i8tvTp03P8+HEdYIiIvAEdE8jbpL46+VMhQCQJuXnzJmFhYcyZMwdHR0dLxxF5ZcePH6dFixbcvHlTBxciIm9AxwTytqivThlUCBBJghwdHXF3d7d0DBEREbEwHROIyOuwsnQAEREREREREXl3VAgQERERERERSUFUCBARERERERFJQVQIEJFE4+zZs2TJksXSMQBYtWoVHh4e2NjY4O/vH6vt559/pkSJEri5ueHs7MyPP/5obvv8889xc3Mz/6VNm9bcPmTIkFhtmTJlolu3buZ1jxw5QsWKFXF0dMTR0ZElS5bEm61Nmzbky5fPvJ3u3bsn/BsgIiLyFphMJu7evWux/bdt2xY7OztcXV0pX748+/btM7e9qH+dMGECjo6OlChRAldXVyIiIuLdh8lkMh8nuLm5sW3btrf6mkRehyYLFJFkKTIykvDwcDJlyvRa6xcrVoxp06axaNEiHjx4EKutRYsWfP755wDcv38fZ2dnvL29KVmyJD///LN5uatXr/LBBx/QqFEjAPr06UOfPn0AePToEXnz5qV58+YAhIWF4efnx6xZs6hQoQLR0dHcvn37mfm6d+8ep0AhIiKS3N29e5eMGTNibW39WuvXq1ePX375hVSpUrFy5UoaNmzI2bNnze3P6l+XL1/O3Llz2b17N5kzZ+bGjRukTp36mfvZtm1bojm5IRIfjQgQkbdm165dVKhQAVdXV1xcXFi+fDkA+/fvx8vLCxcXF8qUKcOOHTvirBseHk7jxo1xcnLC1dWVqlWrvnB/hmGwZcsWOnbsSJEiRQgODn7t7P+eLUiVKm69NHPmzOZ/P3z4kMjIyHi3MXPmTKpVq0bu3LnjtC1btoz3338fDw8PAObNm4enpycVKlQAwNramhw5crx2fhERkcTMMAx69uxJnTp1CAsLe+6y4eHhLFq0iHr16uHm5vbMM/Evo06dOua+3dPTk0uXLhEVFfXC9UaOHMmAAQPMxwA5cuR47WKESGKgQoCIvBW3b9+mbt26DBs2jODgYIKCgvD29ubx48fUr1+fAQMGcPjwYcaMGUODBg3inHVfs2YNd+/eJSQkhODgYBYsWPDMfR06dIju3btTrFgxxo4di4+PDydOnMDb2xuATZs2xRqS//Tfv2foX9XixYspXrw4hQoV4ptvvqFkyZJxlpk2bRrt27ePd/2pU6fGagsJCcHGxoZatWrh5uZGq1atuHHjxjP3HxAQgIuLC7Vq1SIoKOi1XoOIiIglPHr0iKZNm/LgwQOWLl1K+vTp4ywTHR3N2rVrad26NU5OTqxbt44vvviCf/75hwwZMgBPfpw/q39funTpC3MEBARQo0aNWEX/Z/WvISEh7N+/n/Lly1OqVKlYlwXGp1KlSri6utKtWzcePnz4ku+MyDtkiEiSceDAAQMwDhw4YOkoL7Ry5UrD29s7zvOHDx82ChYsGOs5FxcXY9u2bcaZM2eMzJkzG4ZhGH///bfx/vvvG506dTIWLFhg3L9/P979fPXVV0aaNGmMwYMHG3fv3k3ol2EMGDDA+Oqrr57ZfubMGcPNzc04ceJErOe3bt1q5MmTx4iKioqzztmzZ4106dIZt27dMj/3xRdfGPnz5zcuXrxoxMTEGN9++63RoEGDePd58eJFIzo62jAMw1iyZImRO3duIzQ09DVe3buXlD7DIiKJWVL9PgUMDw8PY9CgQc9dztXV1ciVK5exYMEC4/HjxwmeY/bs2YadnZ1x9epV83PP618zZsxofPrpp0ZUVJRx48YNw8nJyVixYkW82z537pxhGIbx4MEDo0WLFkanTp0SPP/blFQ/W/JqNCJARCzOZDLFea5w4cKEhIRQvXp1duzYgbOzM3fu3ImzXLdu3RgwYAC//vorfn5+BAYGcvPmzVjLvI0RAf8qVKgQZcuWZeXKlbGenzp1Kq1bt4532OD06dPx8/MjW7Zs5ucKFCiAj48P+fLlw2Qy0aJFC3bv3h3vPvPly4eV1ZOv73r16pEpUyZOnjz5Rq9DRETkXfH19WXdunXcv3//mctMmTKFJk2a0Lt3b5o3b87SpUt59OhRrGVed0TAwoUL+e6771i3bh25cuUyP/+8/rVAgQI0bdoUa2trbG1tqVGjxjP76QIFCgCQIUMGOnfurMkCJVFSIUBE3govLy9OnTpl7vxiYmK4ffs29vb2xMTEsG7dOgB27tzJ1atXcXNzi7X+xYsXMZlM1KlTh1GjRmEYBhcuXIiznwIFCtC7d2+Cg4P58ccfOXPmDOXKlePjjz82d94+Pj4EBQXF+zdkyJBXfm0hISHmf9+4cYONGzfi4uJifu7+/fssXryYdu3axVk3JiaG6dOnx7lkoFGjRuzbt898ULR69WpcXV3j3f/FixfN/969eze3bt2iaNGir/w6RERELKF3797Ur1+fypUrc+vWrXiXKVWqFOPGjePUqVN06NCBFStWYGdnR+vWrc0Fge7duz+zf69Xr1682/3111/p27cv69evN/9g/9fz+tdmzZqxZs0a4MmcBZs3b463n75z5455zoOYmBgWLlwY7+WDIpamuwaIyFuRNWtWli5dytdff01oaChWVlZ8//331K5dmyVLlvDll1/y9ddfkzZtWhYvXsx7770X60z+kSNH6NWrF4ZhEBUVRcuWLWP92I6Pi4sLLi4uDBs2jJ07d77RJD4bNmygdevW3L9/H8MwWLx4MRMmTKBOnToEBASwbds20qRJg2EY+Pv7U6VKFfO6CxYswMPDg2LFisXZ7vr167GysqJSpUqxnv+3oOHl5YWVlRX58uVj8uTJAFy+fJkaNWqYr1Vs06YN165dw9ramnTp0rFo0aJYExiKiIgkdv7+/mTIkAFfX1/Wrl0b78S6AFZWVlSuXJnKlSvz6NEjVq1ahWEYr73f5s2bkzt3bvz8/MzPbdiwgezZsz+3f+3WrRsdO3bEyckJk8lEgwYNaNiwIQCTJk3i8uXLDBo0iBMnTtCxY0dMJhNRUVG4u7sTEBDw2nlF3haT8Sb/J4nIO3Xw4EE8PDw4cOAA7u7ulo4j8sr0GRYRSRj6PpW3RZ+tlEGXBoiIiIiIiIikICoEiIiIiIiIiKQgKgSISJJUo0aNl5opv3///sydOzfB9rtnzx5cXV2xs7PD19eXS5cuPXf58PBwnJyc4kyGCGAYBr6+vmTJkiXB8omIiCQlib0/37dvH15eXqRPn566devGagsPD6dVq1Y4Ozvj7OxMnTp1uHHjhrn9yJEjVKxYEUdHRxwdHVmyZEmC5Rd5UyoEiEiStHr1auzt7V+43KBBg2jevHmC7DMmJobmzZszbtw4/vrrL2rUqIG/v/9z1+nZsyfly5ePt23s2LEUKVIkQbKJiIgkRYm9P8+TJw/jxo1j7NixcdoCAwMJCwvjyJEjHD16lFy5cjFy5EgAwsLC8PPzY/DgwRw/fpyjR4/i7e2dIPlFEoIKASKSaC1fvhxHR0dcXV3p2bMntra2nD17FoBChQqZZ9GvWLEi33zzDd7e3hQpUoT//e9/5m20adOGcePGJUieAwcOkCpVKnx8fADo2LEjK1asICIiIt7l169fz6VLl+I9cDl27BjLli3j22+/TZBsIiIiiVVS7s/z589PmTJlsLGxidNmMpkICwsjMjKSqKgoHjx4QP78+QGYN28enp6eVKhQAQBra2ty5MiRIPlFEoJuHygiidL169dp164dO3bswMHBgenTpz/zXsMAf//9N5s2bSIyMhInJyd27dpFuXLlnruPxo0bP3M44ooVK3j//fdjPXf+/HkKFixofpwxY0YyZcrE5cuXKVy4cKxl7969S48ePVizZg0hISGx2iIjI/nss8+YOnXqG93iUEREJLFL6v3583Ts2JGdO3eSM2dOrK2tKVu2LF26dAEgJCQEGxsbatWqxcWLF3FxcWH06NEqBkiioREBIpIo7d69GxcXFxwcHABo3bo1adKkeebyjRs3JlWqVKRLlw43Nzf+/vvvF+5j4cKFBAUFxfv334OGV9WlSxd69+5Nzpw547R999131K9fH0dHxzfah4iISGKX1Pvz5/nzzz+JiYnh6tWrXLlyhSxZstC/f38AoqKiWL9+PYGBgRw6dIh8+fLRqVOnt5ZF5FVpRICIJAtp06Y1/9va2pqoqKgXrvOqZxAKFCjAuXPnzI9DQ0O5d+8eefPmjbP+9u3b2b59O9988w0RERHcvn0be3t7Tp48yZYtWzh//jw//fQTUVFR3L9/n0KFCrFv3z6dKRARkRQtsfXnzzN58mSaNWtmzty8eXOGDh1q3oePjw/58uUDoEWLFlSrVu2Vti/yNqkQICKJkqenJ4cPH+bkyZPY29szZ84cHj9+nKD7WLhw4Sst7+HhQWRkJJs2bcLHx4fAwEBq164d66DlX/9e+wiwefNm/P39zddAbtu2LdZybm5usZYXERFJLpJ6f/48hQsX5s8//6Rp06YArFq1CmdnZwAaNWrE1KlTuX//PpkyZWL16tW4urq+0vZF3iYVAkQkUcqZMydTpkyhbt262NjYUKVKFd577z2L3mrPysqKOXPm0LFjRyIiIsibNy+zZ882t9eoUYNBgwZRqlQpi2UUERFJTJJ6f37y5EkqVapEWFgY4eHh5M+fn969e9O5c2cGDhxIhw4dzD/+HRwcCAwMBJ6MCOjduzdeXl5YWVmRL18+Jk+ebJHXKxIfk2EYhqVDiMjLOXjwIB4eHhw4cAB3d3dLx3nrQkNDyZgxIwDLli2jV69eHD9+3MKp5E2ktM+wiMjbkpS+T9WfJy1J6bMlr08jAkQk0Ro/fjwLFy4kOjqaTJkyMXfuXEtHEhERkVek/lwk8VEhQEQSrd69e9O7d29LxxAREZE3oP5cJPHR7QNFREREREREUhAVAkQkWRo4cCD+/v4WzXDkyBEqVqyIo6Mjjo6OLFmyBHhyF4F/74/87194eDgAMTExfPPNNzg7O+Pg4ED79u1jza78ww8/4OTkhJubG56enuzdu9cir01ERMQSLN2/P68Ph2f3/c/r39euXRtre3nz5tW1+fLW6dIAEZG3ICwsDD8/P2bNmkWFChWIjo7m9u3b5nZ7e3vz7QSfNnXqVA4ePMjBgwdJnTo1HTp0ICAggO7duxMUFMSECRM4duwY7733HnPmzKFLly4qBoiIiLxDz+rDn9f3P69/r1atGtWqVTNvp1atWvj4+LyrlyMplEYEiMhbEx4eTuPGjXFycsLV1ZWqVasCcPXqVXx8fPDw8KB48eJ06dKFmJgYAGbMmEHlypVp2rQpTk5OeHl5ERISQr169XB0dKRq1ao8ePAAeHJWoEGDBvj6+uLg4EDt2rW5detWvFlGjRpFmTJlcHd3p3r16pw7dw6AFStW4OLigpubG87OzixfvjxBXvu8efPw9PSkQoUKAFhbW5MjR44XrhccHEzlypVJkyYNJpOJjz/+2HxLI5PJRGRkJA8fPgTg7t275M+fP0HyioiIvKyU3L8/z/P6/uf170+7fPkyGzZsoGXLlm89r6RsKgSIyFuzZs0a7t69S0hICMHBwSxYsACALFmysGLFCg4cOMDhw4c5e/Ysv/76q3m9ffv28cMPPxASEkKRIkWoXbs2kyZN4vjx46RJk4aZM2eal922bRvz5s3jxIkTvP/++/Tq1StOjnnz5nHy5El27drFwYMHad68OZ07dwagb9++BAYGEhQUxOHDh/noo4/irB8aGhpryN7Tf09X8J8WEhKCjY0NtWrVws3NjVatWnHjxg1z+99//427uzulS5dmwoQJ5uc9PDz4/fffuX//PpGRkfz666+cPXsWAFdXV7p27coHH3xA/vz5GTt2LOPHj3+F/yIiIiJvLiX37/DsPvx5ff/z+venzZgxgxo1apAzZ84X/FcQeTO6NEBE3hpXV1eOHz9O586d+eijj6hRowbw5Dq5nj17sn37dgzD4Pr16zg7O9OkSRMAypUrR4ECBQAoVaoUkZGR5MqVC4DSpUtz6tQp8z5q1qxJ7ty5AejQoQP169ePk2PZsmXs27cPDw8PAKKjo81tlSpV4quvvuKTTz6hatWquLm5xVk/Y8aM8Q4BfJ6oqCjWr1/P7t27yZs3L71796ZTp04sXrwYd3d3Ll68SObMmbl48SI1atTA1taWRo0a0aZNG86dO8dHH31EunTpqFy5Mn/++ScAZ86cYcmSJZw+fZq8efPy008/0bhxY7Zv3/5K2URERN5ESu7fn9eHP6/vf17//i/DMJg2bRo//vjjK2USeR0aESAib03hwoUJCQmhevXq7NixA2dnZ+7cucOYMWO4fv06e/bs4fDhwzRr1oyIiAjzemnTpjX/29raOs7jqKioZ+7TZDLFec4wDHr16kVQUBBBQUEcOXKEI0eOADBmzBimT59O+vTpad26NSNGjIiz/uucMShQoAA+Pj7ky5cPk8lEixYt2L17NwCZMmUic+bMAOTPn5+mTZuybds2c/6BAwdy6NAhdu7ciZOTE8WLFwfgt99+o0SJEuTNmxeAtm3bsmPHjliTCYqIiLxtKbl/f14f/ry+/3n9+7+2bNlCRETEc0cjiCQUFQJE5K25ePEiJpOJOnXqMGrUKAzD4MKFC9y5c4fcuXOTNm1arl69yqJFi157H6tXr+batWsATJkyhcqVK8dZpm7dukyaNMk8YU9kZCSHDh0C4MSJE+brGDt16mTusJ/27xmD+P7Wrl0bb65GjRqxb98+7t+/b87p6uoKwJUrV8zXTIaGhrJy5UpKliwJQEREBHfu3AHg5s2bDB8+nB49egBPDrx27NhhvoZy5cqV2NnZkSZNmtd450RERF5PSu7fn9eHP6/vf17//q+pU6fSpk0brK2tX+3NEnkNujRARN6aI0eO0KtXLwzDICoqipYtW+Li4mIeqle8eHHy5s0bb+f+sry9vWnWrBmXLl2iWLFizJgxI84yzZs359atW+YZeKOiomjXrh0lS5akd+/enDx5kjRp0pA+fXomTpz42lmeVqBAAXr37o2XlxdWVlbky5ePyZMnA0/O7E+cOJFUqVIRFRVFw4YNadu2LQD37t2jYsWKWFlZERMTw1dffUXt2rUBqFevHvv27aNUqVLY2NiQIUMG5s2blyB5RUREXlZK7t+f14c/r+9/Xv/+b/uSJUvMIxpE3jaTYRiGpUOIyMs5ePAgHh4eHDhwQPeX5cmswnfv3mXcuHGWjiIvSZ9hEZGEkZy/T9W/W1Zy/mzJ/9GlASIiIiIiIiIpiC4NEJEka+DAgZaOICIiIglM/bvI26cRASIiIiIiIiIpiAoBIiIiIiIiIimICgEi8kIDBw7E39/f0jEwmUyUKFGC1atXm5/bsmULpUuXpnjx4jg5ObFr164XtoWHh9OqVSucnZ1xdnamTp063Lhx44X7P3v2LBUrViRz5sy4ubm9dFtMTAzdunXDyckJFxcXfHx8OH36NABnzpzBw8MDNzc3nJ2dadiwofn2Qi+rTZs2mEwm7t69CzyZzfnpeyEXKlSIbNmyxVlv+vTpmEwmli1bZn6ubNmy5vWcnZ0xmUwcPnwYgO7du1OgQAHq1q37SvlERCTxS6x9/YIFC8x9krOzM6NHjzYv+7z+9Wn/7SdfNsO//eG2bdvMbZ988gl58+aNs73Lly9TrVo17O3tcXFxoUGDBrGOLQoVKoS9vb15mwsXLnxhjoiICOrWrYudnR2urq5UqVIl1uvbu3cvnp6elCxZEkdHR0aMGGFuO3XqFD4+Pri5ueHg4MDXX39tvu3h2LFjKVq0aJzjFUlhDBFJMg4cOGAAxoEDB97pfgcMGGB89dVX73Sf8QGMO3fumB9funTJKFiwoBESEmIYhmFERESY25/XNnbsWKNBgwZGTEyMYRiG8emnnxrdu3d/4f5v3bplbNu2zVi5cqXh6ur60m1Lly41ypQpYzx+/NgwDMP4/vvvjYYNG5pzhYWFmZf98ssvjS+//PKl3g/DMIzffvvN+PTTT+O8N0/7/PPPjS5dusR67syZM0a5cuUMT09PY+nSpfGut2jRIsPZ2TnWc9OnTzf8/PxeOt9/WeozLCKS3CT092li7eu3b99uXLlyxTAMw7h7965RpEgRY9OmTYZhPL9//dfL9JMvyvC0devWGdeuXYuzzNWrV41t27aZH3/zzTdG69atzY8LFixoHDp06KX2/6/w8HBj1apV5uOV8ePHGx999JG53dXV1Vi+fLlhGE+OQ3LkyGEcO3bMMAzD8PPzMwICAszbcXZ2NlatWmVed9OmTXGOV/6lvjpl0IgAkRRkyJAhdOnSxfz4wYMHZMuWjRs3bnDkyBEqVKiAu7s7Tk5ODB48ON5tzJgxI9YZ4ZUrV1KxYkXz49mzZ1O2bFnc3d358MMPCQ4OflsvhwkTJtCsWTMcHR0BsLGxIUuWLC9sM5lMhIWFERkZSVRUFA8ePCB//vwv3F+2bNmoUKECGTJkeKU2k8nEo0ePiIiIwDAM7t+/b96fjY0N6dKlAyA6OpqHDx9iMple6vVfu3aNoUOHMmbMmGcuExERwdy5c2nfvr35uZiYGD799FPGjx+PjY3NM9edOnVqrPVERCTxS259ffny5cmdOzcAmTNnxsHBgbNnzwLP71/h5frJV1W5cmVy5swZ5/lcuXJRoUIF8+OyZcuac76utGnTUqNGDfNxgaenZ6xtPj0q4eHDh6RJk8Y8AtBkMnHv3j3gyUjIyMhI8uTJ80Z5JHnRXQNEUpBWrVrh4eHB6NGjsbGxYdGiRfj4+JAjRw7Spk3Lhg0bsLGxITw8HC8vLypXroynp+dLb3/Hjh3Mnz+frVu3YmNjw7Zt22jWrBnHjh2Ls2zXrl3ZtGlTvNsJDAykbNmyL9xfSEgIBQsWpHLlyty8eRNvb2+GDx9OhgwZntvWsWNHdu7cSc6cObG2tqZs2bKxDpoSWu3atdm0aRO5c+cmY8aM5MuXjy1btpjbHz9+TJkyZTh37hwuLi78/vvvL7Xdzz77jBEjRpAxY8ZnLrNkyRIKFy4ca/jfmDFjKF++PB4eHs9c78KFC2zZsoXZs2e/VBYREUkckltf/7SQkBB27drFpEmTgBf3ry/TTz5LpUqViIqKolKlSnz//ffxFvqfJTo6mp9++gk/P79Yz7dq1QrDMChTpgzDhw8nR44cr5QpICAg1janT5+On58fffv25caNGwQGBpqLJuPGjaN27dpMnDiRO3fu0K9fP0qWLPlK+5PkTSMCRFKQ999/n5IlS5p/aM6YMYO2bdsCT6rFn376KSVKlMDT05Nz584RFBT0Sttfvnw5wcHB5uvMv/jiC27fvk14eHicZceOHUtQUFC8fy97YBAVFcXWrVtZtGgR+/bt486dOwwYMOCFbX/++ScxMTFcvXqVK1eukCVLFvr37/9Kr/VV7N+/n6NHj3Lp0iUuX75MpUqV+N///mduT5MmDUFBQVy7dg0HBwcCAwNfuM0pU6ZQoEABfH19n7vcf8/qHz16lN9++42+ffs+d70ZM2ZQq1YtbG1tX5hFREQSj+TW1//r4sWL+Pn5MWnSJPNZ/+f1ry/bT8bn3LlzHDhwgJ07d3Ljxg26d+/+0usahkHnzp3JmjUrX331lfn5rVu3cvjwYQ4ePIitrS2tW7d+pUxDhw7l9OnTDBs2zPzc8OHDGTZsGOfPn+fYsWP06dOHkJAQ4MnIyKZNm3L58mXOnTvH3LlzWbdu3SvtU5I3FQJEUph27doxffp0/vnnH06fPk316tUB6N27N7a2thw6dIjg4GAqVqxIREREnPVTpUpFdHS0+fHTyxiGQevWrWN19FeuXDEPfX9a165dY01o9/Tfnj17Xuq1FChQgJo1a5I1a1ZSp05N06ZN2b179wvbJk+eTL169UibNi1p0qShefPmzzxjkRBmzZqFr68vWbJkwcrKitatW8e7vzRp0tC2bduXOgu/adMmli9fTqFChShUqBAALi4uHDp0yLzMmTNn2L17N82aNTM/t23bNs6ePUuxYsUoVKgQu3fvpkOHDkycONG8jGEYTJ8+XZcFiIgkUcmpr4cnE/FVrlyZvn370rBhQ/Pzz+tfX6affJYCBQoAkCFDBjp37hxrssAX+fLLL7lw4QILFy7Eyur/fmr9u83UqVPj7+//StscNWoUS5Ys4Y8//iB9+vQA3Lx5k6VLl5r7+MKFC+Pp6cmOHTsA+Pnnn83Fhpw5c1KjRg02b9780vuU5E+FAJEUpm7duuzbt49hw4bRokULUqV6coXQnTt3yJ8/P6lSpeLkyZPPrBoXLVqUw4cPEx4eTlRUFPPmzTO31alThzlz5nD+/HngybXo+/fvj3c7CXGWoFmzZmzatIlHjx4B8Mcff+Dq6vrCtsKFC/Pnn39iGAaGYbBq1SqcnZ0BuHTpEg4ODi+1/5dVuHBhNm7cyOPHj4En11r+u79z584RFhYGPHm/Fi1ahIuLi3ldBwcHLl26FGebc+fO5cKFC5w9e9Z8veDhw4djDfubNm0a9erVM8+NANCpUyeuXLliXs/T05PJkyfTqVMn8zIbN24kKiqKKlWqJNh7ICIi705y6uuvXLlCpUqV6NmzZ5yz6M/rX1/UT1aqVIm9e/fG2d+dO3di9csLFy586SH1X375JadPn2bp0qWkSZPG/PzDhw9j3WFg/vz5sbbZqlUrli5dGu82x4wZw/z581m3bl2s/jxr1qxkyJCBjRs3Ak8KA3v27DG//sKFC7NmzRrz/jdt2mRuEwHNESCS4tjY2NCoUSMmTJjA8ePHzc/37duXli1bMnPmTIoUKfLMoXSenp7UqFEDZ2dn8uTJQ/ny5c1VfW9vb0aMGEG9evWIiori8ePH1KxZk1KlSr2V1+Ll5UWdOnUoWbIk1tbWFC9e3Hzd4PPaBg4cSIcOHcwd4tPD8S9dumQ+YPqvsLAw7OzsePToEffu3SN//vy0bNmSYcOGPbft888/5/jx47i6upI6dWpy585tznL48GH69OkDPDngcHd358cffwTg+vXr3Lp1K95b/71ITEwMM2bMYNasWa+87tSpU2nbtm2sMxkiIpJ0JKe+vn///pw/f56AgAACAgIA+Oqrr2jbtu1z+9fniY6OJjg4ON6Jgk+cOEHHjh0xmUxERUXh7u5u3i9AzZo1zZMjFi9enGLFirF582Z27NjB+PHjcXBwMBc5PvjgA5YuXcq1a9do0KAB0dHRGIZB4cKFY/XP+/fv58svv4yT5eLFi3z99dcULlwYHx8f4Ml/2z179mBtbc2vv/5K9+7diYqKIjIyEn9/f8qVKwfAzJkz6dKlCwEBATx+/Jg6derQpEmTl33bJQUwGYZhWDqEiLycgwcP4uHhwYEDB3B3d7d0nHfOZDJx586dWBXxhDZy5Ejy5MlDixYt3to+XtaiRYs4efLkC6/nf5dmzJjBsmXLWLZs2Wutn9I/wyIiCSW5fp++i75+3759BAYGMmXKlLe2j5d148YNmjVr9s6v39+8eTP+/v7xzhGRXD9bEptGBIhIkpErVy4++ugjhg0bRo0aNd7KPl5lQqC37enrIBOD7t278/vvv+Pl5WXpKCIikky9i76+dOnSlC5d+q1s+1XlyJHjnRcBxo4dyy+//ELevHnf6X4lcdG4T5FkrlChQq88I/C78PS9b//16NEj0qVLx8WLF83PVa5c2Xzv4qtXr7J27Vrq168f7+zEr6tNmzbky5ePkiVLUqxYMSpUqBBrwr79+/fTuHHj524jKCiIBQsWPHeZ33//na5duwJPKvFP39LvZd29e5fhw4e/8nr/KlWqlHmyoCNHjuDr64urqyvOzs6ULl2ao0ePAk/ek3HjxsVad+TIkZw8eZLp06e/9v5FRCTpsMQxxNWrVwkODn5uESC+Ywh4Mmotc+bMuLm54ezsjI+PD3/99dcrZ3jw4AEmk8n82M3NjdDQ0BeuU61aNWxtbeOMZnhe2/OOB54+/njT/v9pXbt2JSQkhPXr1yfI9iRpUiFARBINGxsbypUrZ/6h+vjxY86cOcOVK1fMMxZv2rSJsmXLxjs78bM8PfPxs3Tv3p1Dhw5x6tQpfvrpJwYPHsyYMWOAJz+eFy5c+Nz1X1QIiIqKok6dOowdO/alc8cnIQ8EmjZtyhdffEFwcDBHjx5lyZIl5MyZM0G2LSIiYgk+Pj4EBQVx9OhRPDw88Pf3f+NtBgUFkTFjxucukzp1anr27Bnvj+vntT3P08cfCdn/i4AKASLJxq5du6hQoQKurq64uLiwfPnyOMuMGTOG0qVL4+bmRunSpdm1axfwZGK5Ll264OjoiKurKx4eHkRERHDjxg2qVq1KiRIlcHFxMd+HOCEZhkHPnj2pU6cOYWFh+Pj4mAsBe/bsoXTp0pQtW9Z867/Nmzfj4+PD1atX8fHxwcPDg+LFi9OlSxdiYmKAJ2cEfHx8aNCgASVKlGDv3r1UrFiRL774gtKlS1O0aFG+/vprnjVFipubGwEBAfzwww8YhhGrWh/fe3L9+nX69+/Ppk2bcHNzM9/D2GQyMWDAAEqXLk2vXr2YMWMGdevWNe8nKiqKVq1a4ezsjIeHh/msy3/PDhw9etR866P//e9/hIaG4ubmZp6Y6erVqzRq1IgyZcpQokSJWHMK7Ny503xmpG3btkRFRZnbLl68SL58+cyP33///ViFgOPHj1OpUiXs7OyoX7++eVbmgQMH0qhRI2rXro2dnR21atXi6NGjVKtWDTs7O5o2bWr+byEiIolfcjmG+K9KlSpx7tw58+PmzZtTqlQpXFxcqFmzJlevXjW3BQYGUqxYMUqWLBmnaP/vCIQbN26Yb70LsHjxYlxdXQkPD8fGxsZ8K8P/el4bvNzxQHz9/+DBg3F0dDTfkvHp1yryQoaIJBkHDhwwAOPAgQOxnr9165aRM2dOY+vWrYZhGEZ0dLRx69YtwzAMo2DBgsahQ4cMwzCM69evm9fZtWuXYW9vbxiGYRw8eNBwcHAwoqOjDcMwjLt37xrR0dHGmDFjjA4dOsTaT3z8/f0NV1fXeP92794d7zqAcfXqVaNx48ZG586djaioKMMwDGPbtm1G4cKFDcMwjO+++86YOHGiMWXKFKN///6GYRhGsWLFjC1bthjh4eFGaGioYRiGERUVZdSsWdOYP3++YRiGMX36dCNdunTGiRMnzPv76KOPDF9fX+Px48fGw4cPDQ8PD2Pu3LmGYRhG69atjbFjx8bKd/v2bQMwrl27ZmzatMlwdXU1DMN45nsyffp0w8/PL85r/O6778yPn15m06ZNBmCsX7/eMAzDWLhwoWFvb2/ExMTE2p9hGMaRI0eMggULGoZhGGfOnDEyZ84caz9Vq1Y1Nm/ebBiGYURGRhrVqlUzfv31V+PRo0dG/vz5jXXr1hmGYRhr1641AGPTpk2GYRjGqFGjjPTp0xu+vr5G7969jYMHD5q32bp1a6NMmTLGw4cPjaioKMPLy8uYN2+eYRiGMWDAAOODDz4wbt++bcTExBgffvihUbZsWeP+/ftGZGSk4erqaqxcudKIz7M+wyIi8moS6vs0OR1DPN3PRkdHG5999pkxaNAg83pPv4Zhw4YZHTt2NAzjST+bK1cu4/Lly4ZhGEavXr2Mp38mAcadO3cMwzCMrVu3GoULFzb27Nlj5MuXzzh58mSsbPH1089re9njgf+ue/v2bSNz5sxGWFiYYRiG8fDhQyM8PDze/b4q9dUpgyYLFEkGdu3ahb29Pd7e3gBYWVnFe8u5Q4cOMWTIEG7dumW+h3B4eDiFCxcmKiqKdu3a4ePjQ82aNbGyssLT05OxY8fy9ddf8+GHH1K9evV49/+6w91r1qyJn58f/fr1Mz9XpkwZrl69yoULF9i8eTMTJkwgVapUtG/fnsuXL3Pp0iU8PT2JioqiZ8+ebN++HcMwuH79Os7OzuZb43h5eWFvbx9rf61atSJ16tSkTp2aFi1asH79epo1axZvNuMZowVe9j35V7t27Z7ZVqhQISpVqgRAo0aN6NChAxcuXHju9v7r4cOHbNiwgWvXrpmfe/DgASdPnuTEiROkSpWKypUrA1C1alUKFy5sXu7rr7+mRYsWbNy4ka1bt+Lt7c3UqVPN1yPWq1eP9OnTA0/+u/z999/mdatWrUrWrFkBcHd3x8bGxjxssmTJkpw6deqVXoeIiFhGcjqGAMyj886fP0+2bNnMtz0EmDdvHrNnzyYiIoKIiAhsbW0B2LhxIx9//DF58uQBoFOnTgwbNize/Xp7e9O+fXu8vLyYNWsWdnZ2r5X/aa9zPJApUyaKFStGixYtqFq1KjVr1oz3dogiz6JLA0RSiMePH1O/fn1GjRrF0aNH2bp1K/Bkgr7MmTNz9OhRmjVrxokTJ3BxceH06dOUK1eOoKAgypYty5IlSyhdunS819t37drVPCztv39Pd8D/5evry7p167h//775uTRp0lC+fHnWrFnD2bNncXBwoGjRoly4cIHVq1fj5eVFmjRpGDNmDNevX2fPnj0cPnyYZs2amecRAHjvvfde+J48PRHQf+3bt4+cOXPGuWb+Zd+TV8nxdB6TyUSqVKlibfPp1/Vf/xYsdu/eTVBQEEFBQZw+ffqZtxz872vOlSsXTZs2ZeLEifTt25e5c+ea29KmTWv+t7W1dazLCv7b9rxlRUQkaUsqxxDwf3MEXLx4ETs7Ozp37gzA9u3b+fHHH1m9ejVHjx5lzJgxz+xfn3d8AE+KIjly5Hjl4v3L+vd44Hmsra3ZvXs3/v7+XL9+HU9PT7Zt2/ZW8kjypEKASDLg5eXFqVOnzB1ATEwMt2/fjrVMREQEjx8/pkCBAgCMHz/e3Hbjxg0ePnxI1apVGTp0KIUKFSIkJIQzZ87w3nvv0ahRI8aPH89ff/3FgwcP4ux/7Nix5h+h//0rW7bsM3P37t2b+vXrU7lyZW7dumV+3sfHh5EjR1KmTBnzc56enowYMQIfHx8A7ty5Q+7cuUmbNi1Xr15l0aJFL3yf5syZQ2RkJOHh4cybN898pvy/Dh8+jL+/Pz179ozT9qz3JFOmTNy7d++FGZ529uxZNm3aBDy5zjBXrlzkz5+fwoULc+7cOW7cuAEQ6w4GmTJlIjw83Hy9/nvvvYePj0+sCYQuX77MxYsXcXBwICoqyryP9evXxzqrv3TpUiIjI4En1ycePnyYIkWKvNJrEBGRpC25HUP8K3369EyZMoXVq1dz6NAh7ty5Q8aMGcmePTuPHz8mMDDQvKyvry9r1qwxzxkwadKkZ+73p59+4s6dOwQHBxMYGMiOHTueuezLetbxwNP+2/+HhoZy7do1vL296devHxUqVODQoUNvnEVSDhUCRJKBrFmzsnTpUr799ltcXFxwd3eP0zFlypSJwYMHU6ZMGTw8PEiTJo257cKFC1SpUgUXFxecnZ1xdnbm448/ZvPmzXh4eODm5oaXlxcjR44kc+bMCZrd39+fzz77DF9fX3MH7OPjw6lTp8y3DQT46KOPOHXqFL6+vgB89dVX7Nmzh+LFi9OyZctn/qh/mqOjI+XLl6dEiRJ4e3ubLyOAJ7fGc3NzM5896NWrF926dYuzjWe9J5UqVeLRo0e4uLiYJwt8keLFizNjxgxKlCjBsGHDmD9/PiaTibx589KjRw/KlCmDp6dnrCGa2bJlo1WrVri4uJgnC5o7dy6nT5/G2dmZEiVKUL9+fW7dukWaNGlYuHAhXbt2pUSJEsybNw9XV1fztpYsWYKzszMuLi64urpiY2PDd99991LZRUQkeUhuxxBPy5s3L9988w39+/enevXq2Nvbmy+DeHpSXmdnZwYOHIi3tzclS5bExsYm3v0dPHiQUaNGMXfuXHLmzMmcOXNo2bKluRDh4uJCuXLluH//Pvnz56dly5bmdZ/X9qzjgaf9t/+/d+8e9evXN0/GGBkZSevWrd/k7ZQUxmQ860JYEUl0Dh48iIeHBwcOHMDd3d3ScZKUihUr4u/vH2vWfnn39BkWEUkY+j6Vt0WfrZRBIwJEREREREREUhDdNUBEUoTNmzdbOoKIiIiISKKgEQEiIiIiIiIiKYgKASIiIiIiIiIpiAoBIiIiIiIiIimICgEiIiIiIiIiKYgmCxRJgo4fP27pCCKvRZ9dEZGEpe9VSWj6TKUMKgSIJCG2trakT5+eFi1aWDqKyGtLnz49tra2lo4hIpKk6ZhA3ib11cmfyTAMw9IhROTlnT9/nps3b1o6xis5efIkX331FSaTiXHjxmFvb2/pSElWREQEAwYMYP369Xz11Ve0bNkSk8lk6VivxNbWlgIFClg6hohIkpcUjwle5PDhw7Rt25Y+ffpQv359S8d5pvHjxzNv3jx+++038ubNa+k4CU59dfKnQoCIvFUrV66kSZMmODg48PvvvyfLzvJdi4mJoV+/fgwdOpTPPvuMn3/+mdSpU1s6loiIyBuJiYmhTJkyxMTEsG/fPqytrS0d6ZkePHiAvb095cqVY/HixZaOI/LKNFmgiLwVhmEQEBCAn58fVapUYcuWLSoCJBArKyuGDBnC9OnTmTFjBjVq1ODu3buWjiUiIvJGpk+fzoEDBxg/fnyiLgIAvPfee4wYMYLffvuNDRs2WDqOyCvTiAARSXBRUVF89dVXTJgwge7duzN8+HCsrFR3fBs2b95M/fr1yZ07NytXrqRw4cKWjiQiIvLK7t69i52dHdWqVWP27NmWjvNSDMPA29ubO3fuEBQUpNF5kqToyFxEEtT9+/epXbs2gYGBTJ48mREjRqgI8BZVrFiR3bt3ExkZSdmyZdm5c6elI4mIiLyygQMHEhYWxg8//GDpKC/NZDIxfvx4jh8/zoQJEywdR+SV6OhcRBLMuXPnKF++PLt27WLNmjV89tlnlo6UItjZ2bFr1y4cHR3x9fVl/vz5lo4kIiLy0o4dO8ZPP/1Ev379ktxlhCVLlqRDhw4MGDCA69evWzqOyEvTpQEikiD27NmDn58f6dOnZ9WqVTg6Olo6Uorz6NEjOnTowKxZs/juu+/o169fkrujgIiIpCyGYVClShXOnz/PkSNHsLGxsXSkV3bz5k3s7Oxo0KABv/zyi6XjiLwUjQgQkTe2ePFiKlasSJEiRdizZ4+KABZiY2PDjBkzGDx4MAMGDKBVq1Y8evTI0rFERESeacmSJWzYsIFx48YlySIAPLnV3vfff8/UqVPZv3+/peOIvBSNCBCR12YYBsOHD6d37940bdqUadOmkTZtWkvHEmDhwoW0bt2a0qVLs3TpUmxtbS0dSUREJJawsDAcHR0pUaIEK1eutHScNxIVFYW7uzsZMmRgx44dmh9JEj19QkXktTx+/Jh27drRu3dv+vfvz9y5c1UESEQaN27Mpk2bOHnyJJ6enpw4ccLSkURERGIZOXIkV69eZezYsZaO8sZSpUrF+PHj2b17N3PmzLF0HJEX0ogAEXllt2/fpn79+uzatYupU6fSokULS0eSZzhz5gy1atXi8uXL/Pbbb/j6+lo6koiICGfPnsXR0RF/f3+GDRtm6TgJpkmTJmzevJm//vqLTJkyWTqOyDNpRICIvJJTp05Rrlw5jh49yoYNG1QESOQ++OADdu7cSenSpalWrRrTpk2zdCQRERG++eYbsmXLRp8+fSwdJUGNHDmS+/fv8/3331s6ishzqRAgIi9t27ZteHp6YjKZ2LNnDxUqVLB0JHkJmTNnZtWqVbRv35727dvz7bffEhMTY+lYIiKSQm3YsIHffvuNESNG8N5771k6ToJ6//336d27N+PGjdNleZKo6dIAEXkps2fP5tNPP6V8+fL89ttvZM2a1dKR5BUZhsHYsWP55ptvqF+/PrNmzSJ9+vSWjiUiIilIZGQkbm5uZM2alW3btiXL29xGRETg5OREsWLFWLNmTbJ8jZL0aUSAiDxXTEwM/fr1o1WrVrRo0YI1a9aoCJBEmUwmunXrxtKlS/njjz+oWLEiV65csXQsERFJQSZMmMDx48cZP358sv2BnDZtWsaOHcuff/7JihUrLB1HJF4aESAizxQREUGbNm1YuHAhw4cPp0ePHsm2005pDh48SO3atbG2tmbVqlWUKFHC0pFERCSZu379OnZ2djRp0oRJkyZZOs5bZRgG1atX5/Tp0xw7dkx3VpJERyMCRCRe169fx9fXl99//53FixfTs2dPFQGSEXd3d/bu3YutrS3ly5fnjz/+sHQkERFJ5nr37o2VlRWDBw+2dJS3zmQyERAQwPnz5xk9erSl44jEoUKAiMQREhJC2bJlOXPmDFu2bKFBgwaWjiRvQb58+di6dSsVK1akVq1a/Pzzz5aOJCIiydS+ffuYNm0a33//Pba2tpaO8044ODjw1VdfMXToUC5cuGDpOCKx6NIAEYll3bp1fPLJJxQsWJCVK1dSoEABS0eStyw6Opru3bszduxYvvzyS8aMGYO1tbWlY4mISDIRExND+fLlefjwIQcPHiRVqlSWjvTO3L9/Hzs7O3x8fJg/f76l44iYaUSAiJgFBgby8ccfU758ebZv364iQAphbW3NmDFjmDBhAj///DN+fn6EhoZaOpaIiCQTc+bMYffu3YwfPz5FFQEAMmXKxPDhw1mwYAFbt261dBwRM40IEBGio6Pp0aMHY8aMoUuXLowdOzbFddTyxNq1a2nUqBGFChVi5cqVvP/++5aOJCIiSdi/Z8QrVqzIggULLB3HImJiYvDy8iI8PJwDBw7oGEsSBY0IEEnhHj58SIMGDRg3bhw//vhjiqzWy/+pVq0aO3fu5N69e5QpU4b9+/dbOpKIiCRh33//Pffv32fkyJGWjmIxVlZWjB8/nsOHDzN58mRLxxEBNCJAJEW7dOkStWvX5tSpUyxYsICaNWtaOpIkEteuXcPPz4/Dhw8zZ84c6tevb+lIIiKSxJw4cYISJUowYMAA+vbta+k4Fte+fXuWLl3KqVOnyJ49u6XjSAqnQoBICnXo0CFq1aqFlZUVK1euxNXV1dKRJJEJDw+nTZs2/Prrr/zwww90795dt5AUEZGXYhgGH3/8MX/99RchISGkTZvW0pEs7vr16xQrVoxmzZoxceJES8eRFE6XBoikQCtWrMDb25s8efKwd+9eFQEkXunSpWP+/Pn06dOHnj170qFDByIjIy0dS0REkoAVK1awdu1axo4dqyLA/5czZ06+++47AgMDOXTokKXjSAqnEQEiKYhhGIwbN46vv/6aunXrMnv2bDJkyGDpWJIEzJw5k88++wxvb28WL15M1qxZLR1JREQSqYiICIoXL07RokVZs2aNRpM9JTIyEjc3N7Jly8bWrVv13ojFaESASAoRFRXF559/Trdu3ejevTuLFy9WEUBeWuvWrVm3bh1BQUF4eXnx999/WzqSiIgkUqNHj+b8+fMEBAToh+5/pE6dmoCAALZv3878+fMtHUdSMI0IEEkB7t27R6NGjdi4cSMTJ07k008/tXQkSaJOnTpFzZo1uXPnDsuWLaN8+fKWjiQiIonIhQsXcHBwoFOnTowaNcrScRKtBg0asHv3bk6ePMl7771n6TiSAmlEgEgyd/bsWcqXL8+ePXtYs2aNigDyRooVK8auXbtwcnLC19eXefPmWTqSiIgkIj169CBjxoz079/f0lEStdGjR3P79m2GDh1q6SiSQqkQIJKM7d69m7JlyxIeHs7u3bupVKmSpSNJMpA9e3bWrVtH06ZNad68Od999x0aXCYiIlu2bGHBggX88MMPZMqUydJxErVChQrRs2dPRo8ezenTpy0dR1IgXRogkkwtXLiQ1q1bU6pUKZYuXUqOHDksHUmSGcMwGDZsGH369KFZs2ZMnTpVM0OLiKRQUVFReHh4kC5dOnbu3ImVlc43vkhYWBiOjo64uLiwYsUKS8eRFEb/h4okM4ZhMGTIEJo0aUKDBg1Yv369igDyVphMJnr37s3ChQtZsmQJlStX5saNG5aOJSIiFjB58mSOHDnC+PHjVQR4SenTp2f06NGsXLmS1atXWzqOpDAaESCSjDx69IiOHTsyc+ZMBg4cSP/+/TVbr7wTu3fvxs/Pj/fee49Vq1bh4OBg6UgiIvKO3Lx5Ezs7O+rXr8+UKVMsHSdJMQyDypUrc+HCBY4cOYKNjY2lI0kKoXKdSDJx69Ytqlatyvz585kzZw4DBgxQEUDeGU9PT/bs2UO6dOnw9PRkw4YNlo4kIiLvSL9+/YiOjtbEd6/BZDIREBDAP//8Q0BAgKXjSAqiQoBIMvDXX3/h6elJSEgIGzdupHnz5paOJClQoUKF2LFjB2XLlqV69eo6KyQikgIcOnSIwMBAvvvuO3LmzGnpOEmSs7Mzn3/+Od9//z2XL1+2dBxJIXRpgEgSt2XLFurXr0/OnDlZuXIlRYoUsXQkSeGioqL44osvmDRpEt27d2f48OG6XlREJBkyDIMPP/yQ27dvExQUROrUqS0dKcm6c+cOdnZ2fPzxx8yaNcvScSQF0JGZSBI2c+ZMqlSpgpubGzt37lQRQBKFVKlSMWHCBMaOHcuoUaP45JNPCAsLs3QsERFJYPPnz2f79u0EBASoCPCGsmbNyrBhw5g9ezY7d+60dBxJATQiQCQJiomJoX///gwZMoT27dszceJEdcCSKK1YsYKmTZvi6OjI77//Tp48eSwdSUREEsCDBw+wt7fH09OT3377zdJxkoXo6GjKli2LYRjs3bsXa2trS0eSZEwjAkSSmPDwcJo0acLQoUMZMWIEv/zyi4oAkmjVrl2bbdu2ceXKFcqUKUNwcLClI4mISAIYOnQot2/fZvTo0ZaOkmxYW1szfvx4Dh48yLRp0ywdR5I5jQgQSUKuXbuGn58fhw8fZs6cOdSvX9/SkUReyqVLl6hduzanTp1iwYIF1KxZ09KRRETkNZ0+fZrixYvTq1cvBg4caOk4yU7r1q1ZvXo1f/31F1mzZrV0HEmmVAgQSSKOHj1KrVq1ePToEStWrKBUqVKWjiTySh48eEDz5s1ZuXIl48aN44svvrB0JBEReQ21a9fm8OHDHD9+nPTp01s6TrJz5coV7OzsaNu2LT/++KOl40gypUsDRJKAtWvX4uXlRebMmdm7d6+KAJIkvffeeyxZsoSuXbvy5Zdf0qVLF6KioiwdS0REXsHq1atZuXIlo0ePVhHgLcmTJw/9+/dnwoQJHDlyxNJxJJnSiACRRG7ixIl88cUXVKtWjQULFpAxY0ZLRxJ5Y4GBgXz++edUrVqVBQsWkClTJktHEhGRF3j06BElSpTg/fffZ/369ZhMJktHSrYeP35MiRIlyJs3Lxs3btR7LQlOIwJEEqno6Gi6du1K586d+fzzz1m+fLmKAJJsdOzYkT/++IMdO3ZQoUIFzp8/b+lIIiLyAgEBAfzzzz8EBAToh+lbliZNGgICAti8eTOLFy+2dBxJhjQiQCQRevDgAc2aNWPVqlUEBATQpUsXS0cSeStCQkKoWbMm4eHhrFixgtKlS1s6koiIxOPy5cvY29vTrl07AgICLB0nxahTpw5BQUGcOHFCl2JIgtKIAJFE5uLFi3h7e7Np0yZWrFihIoAka05OTuzZs4cPPviAjz76SPeiFhFJpHr27EnatGn57rvvLB0lRRk7dizXrl1j+PDhlo4iyYwKASKJyIEDByhbtiy3bt1ix44d1KhRw9KRRN66nDlzsnHjRurUqcMnn3zC8OHD0WA1EZHEY8eOHcyZM4dhw4aRJUsWS8dJUYoUKcI333zDiBEjOHPmjKXjSDKiSwNEEonly5fTrFkzihcvzu+//07u3LktHUnknYqJiWHgwIF8//33tGvXjokTJ5ImTRpLxxIRSdGio6MpU6YMJpOJPXv2YG1tbelIKc6DBw9wcHCgTJkyLFmyxNJxJJnQiAARCzMMg9GjR1OvXj0+/vhjNm/erCKApEhWVlYMGjSImTNnMnv2bKpXr86dO3csHUtEJEWbNm0aBw8eZPz48SoCWMh7773HyJEjWbp0KevWrbN0HEkmNCJAxIIiIyPp0qULkydP5ttvv2XIkCFYWak+J7J161bq1auHra0tq1atomjRopaOJCKS4ty5cwc7Oztq1KjBzJkzLR0nRTMMg48++oibN28SHBxM6tSpLR1Jkjj94hCxkLt371KjRg2mTZvG1KlTGTZsmIoAIv/fhx9+yJ49ewAoW7Ys27Zts3AiEZGUZ8CAAURERGiiukTAZDLx448/cvLkSX766SdLx5FkQL86RCzgzJkzeHl5sX//fv7880/atWtn6UgiiU7RokXZtWsXLi4uVK5cmTlz5lg6kohIinHkyBEmTJhA//79yZMnj6XjCODm5kbHjh0ZOHAg165ds3QcSeJ0aYDIO7Zr1y78/PzIlCkTq1atwt7e3tKRRBK1x48f07FjR2bMmEG/fv347rvvMJlMlo4lIpJsGYaBr68vly9f5siRI5q4NRG5desWdnZ2+Pn5MW3aNEvHkSRMIwJE3qEFCxbg4+ODvb09u3fvVhFA5CWkSZOGadOmMWzYML7//nuaN29ORESEpWOJiCQr0dHRDBw4kNDQUBYvXszmzZsJCAhQESCRyZ49O4MHD2b69Ons3buXCxcuMGbMGEvHkiRIIwJE3pL79+9z/fp1ihYtimEYDB48mP79+9OiRQumTJmCjY2NpSOKJDmLFy+mZcuWlCxZkmXLlpEzZ04ePnzIxYsXVVgTEXkDJ0+exMHBgT///JP27dvj5ubG77//bulYEo/o6Gg8PDywsbGhc+fOtGnThoiICB1byivRiACRt+TTTz+lY8eOPHr0iNatW9O/f38GDRrErFmz9EUt8po++eQTtmzZwj///IOnpychISEsWbKEkiVLcuvWLUvHExFJsm7fvg3Ab7/9xrVr1xg0aBBbtmyxcCr5L8Mw2LBhA6NHj2bv3r0cPHgQQLfblVemQoDIW3DhwgWWLFlClSpVqFKlCr/++ivz5s2jX79+urZZ5A2VKVOGPXv2kCFDBry8vEiXLh0xMTFMmTLF0tFERJKsf39ITp8+nbp161K7dm0++eQTIiMjLZxMnnb9+nVq167Nl19+SZUqVZg1axagQoC8OhUCRN6CwMBA0qZNy+TJkzl+/DgbN26kadOmlo4lkmwULFiQHTt2UK5cOZo0aYK7uzsTJkwgOjra0tFERJKkf39IWllZ8euvv1K8eHH279+v+9UnMrly5WLfvn2kT5+e9evXExoaCqgQIK9OhQCRBBYREcHPP//M48ePefToEVWqVOHTTz/lu+++s3Q0kWQjODgYd3d3bGxs8PT0ZNeuXZw/f57ly5dbOpqISJK0b98+AKytrZkxYwZ//PEHBQsWtHAqiY+Liwu7du1ixIgR5pGmISEhFk4lSY0KASIJbOTIkdy9e5fIyEguX77MwYMH8fb2pm7dupaOJpJs2NnZ0apVK+7evcv+/fvNz3ft2tWCqUREki4fHx/Kli3LqVOnaN26tS5lTORSpUrFN998Q3BwMK6urpQuXdrSkSSJ0V0DRBLYypUrGTlyJB07dsTX15fcuXNbOpJIshYREcHevXuZOnUqNjY2TJ482dKRRERERBI1FQJEREREREREUpBUlg6QXJw/f56bN29aOobIa7O1taVAgQKWjiGJiL7XJKnT91ryoO8ieRkJ8f+7PmvyMpJL36JCQAI4f/48jo6OhIWFWTqKyGtLnz49x48fTxZfbPLm9L0myYG+15I+fRfJy3rT/9/1WZOXlVz6FhUCEsDNmzcJCwtjzpw5ODo6WjqOyCs7fvw4LVq04ObNm0n+S00Shr7XJKnT91ryoO8ieRkJ8f+7PmvyMpJT36JCQAJydHTE3d3d0jFERBKMvtdEJDHQd5G8K/qsSUqh2weKiIiIiIiIpCAqBIiIiIiIiIikICoEiIiIiIiIiKQgKgRIgjGZTNy9e9di+x86dCj29vZYWVmxbNmyZy539uxZrK2tcXNzM//9/fff7y6oiMgr2rVrl/n7qnjx4nTs2JFHjx69sG3z5s2kS5cu1vddeHh4vPto06YN+fLlMy/XvXv3d/b6RBKzs2fPkiVLFkvHMPvtt98oUaIEzs7OODs7c/bsWQA+//zzWP+vp02blh9//BGA8PBwWrVqZV6nTp063LhxA4Dp06fHWs/W1pb69esDcObMGTw8PHBzc8PZ2ZmGDRty584dc5bz589Tu3Zt7O3tcXJyYvz48fFmHjhwIDly5DDvo3nz5m/xHUp+LH2MvXfvXjw9PSlZsiSOjo6MGDHC3BYWFkbTpk0pWrQodnZ2LF682Nx26tQpqlSpgqurK8WLF2fhwoXP3MepU6fw8vLCzs6O0qVLc+zYsbf6mkSTBUoicvfuXTJmzIi1tfVrrV+5cmWaNGlCu3btXrhsxowZCQoKeq39iEjSExUVRapUlu3ybt26Rfbs2V9rXVdXV/bt20fq1KmJiYmhQYMGTJgwga5duz63DcDe3v6lv++6d++Ov7//a2UUkReLjIwkPDycTJkyvdb6hw4dok+fPmzcuJG8efMSGhpqPm76+eefzctdvXqVDz74gEaNGgEQGBhIWFgYR44cwWQy8dlnnzFy5EhGjBhB27Ztadu2rXldZ2dn8w/1vHnzsn37dtKlSwfAV199xcCBAwkICMAwDOrVq8e3335Lw4YNAbh27dozszdv3pxx48a91uuWN/Omx9gdOnRg0KBB1KlTh9u3b+Pg4ECtWrVwcnJi1KhR2NjYcPr0ac6cOUPZsmXx8fEhe/bstGnThrZt2/Lpp59y48YNSpUqRYUKFciXL1+cfXTs2JEOHTrQpk0bFi9eTJs2bdi3b9+bvnR5Do0IkARnGAY9e/akTp06L7wXa3h4OIsWLaJevXq4ubkRERHx2vstU6YMhQsXfu31RSR5MZlMDBgwgNKlS9OrVy+uX79O/fr1zWfSAgMDAfjzzz+pWrUqAPfv3yd16tRMnjwZgFmzZpmLi4MHD8bR0dF8RuvcuXMvzHDixAn69++Pg4MD06ZNe+3Xkj59elKnTg3A48ePCQ8Px2QyvbBNROLatWsXFSpUwNXVFRcXF5YvXw7A/v378fLywsXFhTJlyrBjx44464aHh9O4cWOcnJxwdXU1f3c8j2EYbNmyhY4dO1KkSBGCg4NfO/vo0aPp1q0befPmBZ6c2EifPn2c5WbOnEm1atXInTs38OT7MCwsjMjISKKionjw4AH58+ePs96ePXu4fv06derUAcDGxsZcBIiOjubhw4fm75cNGzZgY2NjLgIA5MqV67Vfm7yYpY6xnx6R8PDhQ9KkSUO2bNkAWLhwIf/73/8A+OCDD6hYsSJLly4FIDg4mBo1agCQI0cOXF1d4x0VcP36dfbv30+LFi0AaNCgARcuXOD06dOvnVleTCMCJEE9evSIpk2bkj17dpYuXRpv5TE6Opr169czb948tm7dSpUqVfjiiy+oWLEiVlZPalMjR45k7ty58e5jwIAB1KtX741yPnz4kNKlSxMdHU3dunXp06fPa1dJRSTxsra2Np9RaNy4Mfb29ixZsoTr16/j4eGBq6sr3t7eNGnShEePHrFp0yZKly7N+vXr6dChA+vWrePjjz/mzp07jBo1iitXrpAuXTrCwsLM31f/dfHiRRYsWMCCBQtIkyYNTZo0YfPmzeYD8tDQULy9veNdN1euXKxduzbetrNnz+Ln58fff/9NzZo16dy580u1/f3337i7u2NtbU3btm1jtf1XQEAA06ZNo0CBAgwePBg3N7fnvr8iSc3t27epW7cuixcvxtvbm5iYGO7evcvjx4+pX78+v/zyC9WqVWP79u00aNAgzg+RNWvWcPfuXUJCQszbe5ZDhw4xb948li5dirOzM02aNGHs2LHmH+6bNm0yj9z5r5o1azJkyJA4z4eEhFCoUCE++ugj7t+/T61atRg4cGCcY5hp06YxatQo8+OOHTuyc+dOcubMibW1NWXLlqVLly5xtj916lRatmxpLi7CkwJjmTJlOHfuHC4uLvz+++/mLDly5KBJkyacPHmSQoUKMXr06GeelFm0aBGbNm0ie/bs9OvXDx8fn2e+dxKXJY+xp0+fjp+fH3379uXGjRsEBgaa+7Tz589TsGBB87KFChXi/PnzAHh4eDBnzhx69OjBP//8w86dOylUqFCc7V+4cOH/sXffUVFd3d/Av0O1giIKKqKiInWGInZUiqhgb9ixJGqw+9g1aiyxa6xBo9HEEkvEBsbYsCtW7D0KqCAWmkgb2O8fvNwfIzC0gRmG/VnLtZy5bd9h3zNnzj33HNSsWVPouScSiWBqaoqwsDA0bNiwYB8Uyz9iRXbr1i0CQLdu3VJ2KEoFgBwdHWnBggVy15NIJGRkZER79+6llJQUhcfRtm1bOnToUK7Lk5KS6P3790RE9OnTJ3J3d6dly5YpPI7ShHOYfUsdcgIAhYeHC68NDAzo1atXwuvx48fTwoULiYjIxcWFzpw5Q2PHjqUjR45QgwYNKC0tjWrVqkUREREklUqpSZMm1LNnT/Lz85PZb1YHDx4kkUhEAwYMoNevXxfLecXHx1O3bt3or7/+ynNZbGwsxcTEEBFReHg42dra0r59+3Lc75s3bygtLY2IiPz9/cnY2Jji4+OL5RxKgjrkMFP83zEgIICcnZ2zvX/v3j2qW7euzHtisZguXrxIr169In19fSIievnyJdWpU4d++OEH2rt3L8XFxeV4nAkTJpCOjg4tWrRIuAYVwdbWljw9Penr16/05csXcnV1pfXr18usc+HCBapZsyZJpVLhvSNHjlDv3r0pMTGRkpOTqX///jR79myZ7b58+UKVK1emhw8f5njs5ORkGjlypFBnWrVqFVWqVIkePHhARES//vorOTo65rhtRESEUOe7dOkSVa9eXaFlpCLyRJXLDGXXsb29vWn37t1ElHENmJiYCHlSqVIlevfunbDu1KlT6ccffyQiolevXlHPnj1JIpFQ165dqW/fvjRp0qRs+7958yaZm5vLvOfk5ERnzpxR2DkoiirnSUHxowFMoVxdXXHq1CnExcXlus7WrVvRr18/zJo1CwMHDsShQ4eEga0yrVixQmbgmqz/MrsbFZauri5q1KgBADAwMMDw4cNx8eLFIu2TMaaaKlWqlOuyrN3n3d3dcfr0aVy4cAFubm6wtbXFrl27ULVqVRgbG0NTUxPXrl3DxIkTERUVhebNm+dYbrRv3x6//fYbIiMj0blzZyxYsADPnj2TWSc+Pj7X8q1Dhw75Oqd+/frleEfn22V6enrQ19cHAJiYmKB///65lne1a9cW7hj16NEDenp6ePr0aZ7xMKaucnrExszMDI8ePULHjh1x+fJl2NjYyAyel2ny5MmYN28e9u/fj27dumHz5s34+PGjzDpBQUG5lgWzZ8/OMSZTU1P06tUL5cuXR8WKFdGzZ09cu3ZNZp1t27bBx8dH5o7xli1b0KNHD5QrVw46OjoYOHAggoKCZLY7cOAArK2tYWVlleOxdXR0MGzYMOzcuVOIxd7eHtbW1gCAwYMH4/bt20hNTc22rbGxsdDLoFWrVrC3t8fNmzdzPA7LmbLq2B8/fsShQ4cwYMAAABnXQPPmzYVHZ0xNTWUelXv9+jVMTU0BZPQOOHjwIEJCQnDkyBHExsYK+ZJVnTp1EBERAalUCiDjEYiwsDBhP6yYKLslQh2oU8tQUQCg6OhoWrNmDTk5OdHHjx/lrp+WlkanTp2iYcOGkampKQ0ZMoSSkpKKHEdePQLev38vtJImJSVR7969hZbLsopzmH1LHXIis0zK1LdvX5o1axYREUVFRVGdOnXo2rVrREQUHBxMdevWpTZt2hAR0YYNG6hu3bo0YcIEIiKKi4ujt2/fCvvy9vamtWvXyj3+u3fvaPXq1eTk5ESOjo507NixQp/L8+fPhXIrOTlZ5lzkLXv37p1wlz8uLo5atmxJ27Zty/EYWXs5XL16lapVq6bQO5klTR1ymCn+7/j582cyNjamCxcuEFFGXeTTp0+UnJxMderUoZMnTxIR0eXLl8nIyIji4+NlegSEh4fTly9fiIiEbe7evSv3mHfv3qXp06dTw4YNqWPHjvTkyZNCx797927y9vamtLQ0Sk1Npa5du9Ly5cuF5bGxsVSxYkV69uyZzHbjxo0jHx8fSk9Pp/T0dPrhhx9o5MiRMuu0bt2afvvtN5n3Xr9+TQkJCUSU8VlNnjyZBgwYQEQZPQjq169Pb968ISKi/fv3k5WVVY5xZy1fnj17RjVq1KCnT58W8lPIriz0CFBWHVsqlVLVqlWFu/MfPnygOnXq0JUrV4iIaN68eeTj40NERP/99x9Vr16dPnz4QEREkZGRwnfQiRMnqE6dOvT169ccj9O2bVvavn07EREdOHAg194lyqbKeVJQ3BCgAOqUEEWRtdK9ZcsWEovFFBERka9tk5KS6ODBg5SYmFjo4y9cuJBq165NOjo6VK1aNapduzZFRUUREdGPP/5Iv/76KxFldN21trYmsVhMVlZWNHbsWIU0QJRmnMPsW+qQE982BERGRlKPHj3IxsaGrK2tyc/PT1iWlpZGVapUoZ9++omIiJ4+fUoA6OjRo0SUUYlt1qwZ2djYkK2tLfXs2bNAP5KfPXtGFy9eLPS5bN68WabcGjdunFBeylu2fv16srKyEpbNmzeP0tPTiYjo7du3JJFIhGO4ubmRjY0NSSQSat68OZ09e7bQ8aoCdchhVjx/x6tXr1LLli3J1taWJBKJcJ3fuHGDWrRoQba2tuTk5CRcs1kbAo4fP04SiUS4pjIb3fIjPT2dLl26RM+fPy907GlpafS///2PLCwsyNramkaPHk3JycnC8s2bNwsNmll9+vSJevXqRVZWVmRlZUU9e/YUfqwRET158oQqVaqU7VGHo0ePkq2tLdna2pK1tTUNHjxY5kfov//+K3wezs7OdO/ePWGZRCIRGlCHDBlC1tbWJJFIyMHBgQ4cOFDozyAnZaUhgEg5dexTp06Rg4MDicVisrS0pFWrVgnLvnz5Qn379iUzMzNq1KiRzONnv/32GzVo0IDMzc2pXbt2dP/+fWHZkSNHaMSIEcLrJ0+eUPPmzalRo0bk6Ogok0uqRJXzpKBEREQl2AFBLd2+fRuOjo64desWHBwclB0OYwXGOcy+xTnBSjvOYfXAf0eWH4rIE841lh/qlCc8RgBjjDHGGGOMMVaGcEMAY4wxxhhjjDFWhnBDACsST0/PfI0qPXfu3FznLC2M4OBgSCQSmJubw9XVFW/fvs113fPnz8PJyUkYCffq1asAgBs3bqBly5aoUKECunfvnuO2iYmJsLKy4rm0GStDVL1cO3ToEMRiMezs7GBlZYXZs2cj8ym/vXv3ws7ODjY2NrCxscGqVauybU9EcHV1RZUqVRQWO2Msg6qXH5lyqt+kp6dj8uTJsLKyglgshouLC168eCEsX7FiBWxsbGBlZYUePXogJiZGYfEzxVL1PMxPHZyVAOUOUaAe1GnQiNIgLS2NGjRoIAxktWLFCurdu3eO6759+5bq1q1Ljx49IqKMAVMyB1sJDw+n4OBg8vPzo27duuW4/bhx4+i7776TGVBLHXEOs29xTpSsgpRrcXFxwijMycnJ5OTkRP7+/kSUMT935gBSMTEx1KBBAwoKCpLZftWqVfTdd98Jg5+pK85h9cB/x7wVpPzIlFP95tChQ9S0aVNhFpKFCxdSnz59iIjo5MmTZGlpKQwmuHDhQvL19S2GsykcdR8ssDQoSB7mpw6uqtQpT7hHAMvTkSNHYGlpCYlEgunTp8PQ0BCvX78GkDE/aEhICACgXbt2mDJlCpydndGgQQOMHj1a2MfQoUPxyy+/KCSeW7duQUtLCy4uLgCAUaNG4dixY0hKSsq27qZNmzBgwABYWloCAHR1dYW7YCYmJmjatCl0dXVzPM7p06fx9u1bDBw4UCFxM8ZUR2ku1ypXrgwNjYyv76SkJCQnJwvznbdq1QrGxsYAAH19fVhYWAjnBQAPHz7E4cOHMWPGDIXEzVhZVJrLDyD3+o1IJEJycjKSkpJARIiLi4OJiQkA4O7du2jdujUqV64MIOOO886dOxUSPyuc0pyHedXBWcnQUnYATLVFRUVh+PDhuHz5MiwsLLB9+3Z8+vQp1/VfvnyJoKAgpKamCt3wW7RoIfcY3t7euXZfOnbsGOrUqSPzXlhYGOrWrSu8rly5MvT09PDu3TuYmZnJrPvo0SPUrVsX7u7u+PjxI5ydnbF06VJUrFhRbkwxMTGYNm0aTpw4gUePHsldlzFWupT2cg0Arly5glGjRuH58+f44Ycf0K1bt2zrPHr0CFevXoWfnx8AIDU1Fd9//z22bdsGTU1NufEzxnJW2ssPefWbLl26ICgoCMbGxqhcuTJq166N8+fPAwAcHR2xadMmREZGwsjICLt370Z8fDw+f/4MAwMDuefDFK+05yFTDdwQwOS6du0axGIxLCwsAAA+Pj4yLYnf8vb2hpaWFrS0tGBnZ4eXL1/mWdDs27dPoTFnJZVKceHCBZw+fRqVKlXCsGHDMG/ePKxcuVLudmPHjsWsWbNQo0YNbghgTM2U9nINAFq2bIn79+/jw4cP6NWrFy5evIg2bdoIy9+8eYNu3brBz89PuKP3008/oWfPnrC0tJTpJcAYy7/SXn7Iq9/cvHkTDx48wNu3b6Gnp4cZM2Zg9OjR2LVrF1xcXDBlyhR07twZmpqa6NGjBwBAS4t/SihDac9Dphr46mUKVa5cOeH/mpqakEqleW5T0BZHU1NThIaGCq/j4+MRGxuLWrVqZdve1NQUdnZ2qFq1KgCgf//+WLJkSZ4xXbp0CZcuXcKUKVOQlJSEz58/o3HjxvkaeIUxpl5UrVzLqnr16vD09MSBAweEhoB3797B3d0dc+bMQZ8+fYR1z58/j7CwMGzYsAFSqRRxcXGoV68ebty4gerVq+d5ToyxglO18kNe/ebPP/+UGUjUx8cHHh4ewra+vr7w9fUFkPFD1MTEBHp6enmeD1M+VctDphq4IYDJ1bx5c9y7dw9Pnz5F48aNsWvXLqSkpCj0GAVtcXR0dERqaiqCgoLg4uKCzZs3o0uXLjKFXKYBAwZg+vTpSE5Ohq6uLv755x9IJJI8j5H1btm5c+cwceJE4VkrxljpVtrLtSdPnsDc3BwaGhqIj49HYGAghgwZAgCIiIiAm5sbpk+fDh8fH5ntLl68KPz/9evXsLOz454BjBVQaS8/5NVvzMzMcPz4cUyZMgU6OjoICAiAjY2NsH5ERARq1qyJr1+/Yu7cuZg2bVqhzo8VXWnPQ6YauCGAyVWjRg1s3boV3bt3h66uLtq3b49KlSopddopDQ0N7Nq1C6NGjUJSUhJq1aolM2CNp6cnFixYgCZNmqBly5bo2rUr7O3toampCWtra+F52adPn8LNzQ1fv35FYmIiTExMMGvWLKG1mzGmnkp7ubZv3z7s27cP2traSEtLQ+/evfHdd98ByJgKKiwsDGvXrsXatWsBABMmTMCwYcOUcl6MqZvSXn7IM2bMGDx+/BgSiQTa2towNjYW6kwA4OHhgfT0dKSkpGDw4MEYO3ZssZ0Tk6+05yHXwVWEsqctUAfqNI1ETjKniiHKmFrGwsJCidGw4qDuOcwKTt1zgss19afuOVxWqOLfkcsP1VMWpw/kPFSO0pYn8nCPAJan9evXY9++fUhLS4Oenh52796t7JAYY6xIuFxjjBUWlx9MFXAesqLihgCWp1mzZmHWrFnKDoMxxhSGyzXGWGFx+cFUAechKyoNZQfAGGOMMcYYY4yxksMNAaxEzZ8/HxMnTlTa8V+/fo127dpBX18fdnZ2MsvS09MxefJkWFlZQSwWw8XFBS9evAAA3L9/H23atIGFhQVsbGwwfPhwJCYmCtv+8ccfsLW1hZ2dHezt7XH8+PGSPC3GmBIpu1zLREQyU38BwL///gs7OzvhX61ateDg4AAgo1zLuqxevXowMDAQtn3+/DlatmwJc3NzODk54eHDhyV9SoyVKcouS+TVg7IaOnQoRCIRYmJihPdEIpFQD7Kzs5OZpYQpn7JzK1NO31MAsGzZMlhZWcHOzg7NmzfH9evXhWW55da7d+/QoUMHNG7cGGKxGL169cKHDx9K8nRKPW4IYGWKnp4eFi1ahD179mRbdvToUVy+fBl3797FvXv34ObmJnS5KleuHDZs2IAnT57g7t27SEhIwLJlywAAnz9/xrhx43Dq1CmEhIRg/fr1GDp0aEmeFmOMYc2aNWjQoIHMex06dEBISIjwz8HBAQMHDgQA2Nrayizr3LmzsAwARo0ahZEjR+LZs2eYPn06l2uMqTl59aBM/v7+0NbWznH7ixcvCuWJs7NzSYTMSpmcvqdCQkKwadMmXL9+HSEhIRg7dmy2GSlyyi1NTU38+OOPePr0Ke7duwczMzNMnTq1xM5FHXBDQBmUmJgIb29vWFlZQSKRwMPDAwAQGRkJFxcXODo6wtraGmPHjkV6ejoAYMeOHXB3d0f//v1hZWWFli1b4tGjR+jRowcsLS3h4eGBL1++AMhodezVqxdcXV1hYWGBLl264NOnTznGsnLlSjRt2hQODg7o2LEjQkNDAQDHjh2DWCyGnZ0dbGxscOTIEYWcu4GBAVq3bo2KFStmWyYSiZCcnIykpCQQEeLi4mBiYgIAaNSoEcRiMYCMgsfJyUmYizc9PR1EhPj4eABATEyMsB1jrGSU5XINAB4+fIjDhw9jxowZua7z7t07nDlzBoMHD862LCkpCbt378aIESMAAFFRUbh58yYGDRoEAOjVqxfCw8NzvDvImDopy2WJvHoQALx//x4///wzVq9erZDjlTVlObeA3L+nRCIRUlNTkZCQACD/9WgjIyO0bt1aeN2sWTOhbs7yhwcLLINOnDiBmJgYPHr0CEDGHW0AqFKlCo4dO4ZKlSohLS0N3bp1w/79+9GvXz8AwI0bN3D//n2Ymppi8ODB6NKlC65cuQIjIyN07twZf/zxB8aMGQMgo+Xu3r17MDY2hq+vL2bOnIktW7bIxLFnzx48ffoUV69ehaamJnbu3AlfX18EBgZizpw52Lx5M1q0aIH09HTExcVlO4/4+PhcW5yNjIzw77//Fuhz6dKlC4KCgmBsbIzKlSujdu3aOH/+fLb1EhISsHXrVixZsgQAYGhoCD8/Pzg4OMDAwACJiYk4ffp0gY7NGCuaslyupaam4vvvv8e2bdugqamZ62e0Y8cOeHp6okaNGtmW+fv7w8zMTHhkKjw8HDVr1oSWVkY1QSQSwdTUFGFhYWjYsGGux2CstCvLZUle9aDvv/8ey5cvR+XKlXPcr5ubG6RSKdzc3LBw4cIcb7qUZWU5t+R9T0kkEkyaNAn169eHgYEBdHV1ceHCBZl18sqttLQ0bNiwAd26dcv182fZcUNAGSSRSPD48WP4+vqibdu28PT0BJBxZ3v69Om4dOkSiAhRUVGwsbERCqIWLVrA1NQUANCkSROkpqbCyMgIAODk5ITnz58Lx/Dy8oKxsTEAYOTIkejZs2e2OA4fPowbN27A0dERQMZFnMnNzQ0TJkxA79694eHhke15fgCoXLkyQkJCiv6B/H83b97EgwcP8PbtW+jp6WHGjBkYPXo0du3aJayTkpICb29veHh4oEePHgCA2NhYrF27FtevX4elpSWOHTuGHj164PHjx9DR0VFYfIyx3JXlcu2nn35Cz549YWlpmevdECLC77//jnXr1uW4fNu2bUJvAMbKsrJclsirB23duhWmpqZwdXXNcdvQ0FCYmpoiISEBo0ePxtSpU7Fp06YCHV/dleXckvc99erVK/j7++PFixeoVasWNmzYAG9vb1y6dAlA3rlFRPD19UXVqlUxYcKEAsVV1nFDQBlkZmaGR48e4ezZszh9+jSmTZuGkJAQbNy4EVFRUQgODka5cuUwefJkJCUlCduVK1dO+L+mpma211KpNNdjikSibO8REWbOnImRI0dmW7Z69Wo8fPgQQUFB8PHxwcCBAzFt2jSZdRTdI+DPP/+UGcDEx8dH6LYFZLRment7o2bNmli7dq3w/qlTp1ClShVYWloCyGhRHz58OEJDQ9GoUaMCxcAYK5yyXK6dP38eYWFh2LBhA6RSKeLi4lCvXj3cuHED1atXF9ZJSkpChw4dsm3/6tUrXLt2DQcPHhTeq1OnDiIiIiCVSqGlpQUiQlhYmFAZZUxdleWyRF49KCgoCBcuXEBAQICwvlgsxpEjR2Bvby+UDRUrVoSvr2+OcZd1ZTm35H1PHTx4ELa2tqhVqxYAYNiwYRg3bhxSUlKgo6OTZ26NHz8e4eHhOHz4MDQ0+Kn3guCGgDLozZs3qFq1Krp27YqOHTvi8OHDCA8PR3R0NIyNjVGuXDlERkbiwIED6NWrV6GOcfz4cbx//x5GRkbYunUr3N3ds63TvXt3rFq1Cr1794aBgQFSU1Px4MED2Nvb48mTJ7C2toa1tTW0tLRw8uTJbNsrukeAmZkZjh8/jilTpkBHRwcBAQGwsbEBAEilUvTr1w8GBgbYsmWLTMFqZmaGkJAQREZGwtjYGFevXoVUKkWdOnUUFhtjTL6yXK5lHZ379evXsLOzy3bHZdu2bRg6dGiOjw78/vvv6NGjh8wozjVq1ICDgwN27dqFoUOH4uDBgzAxMeHHApjaK8tlibx60O7du2XWFYlEuHfvHqpUqYLo6Gjo6uqiQoUKSE9Px759+2Bvb1+gY5cFZTm35H1PmZmZYfv27fjy5QsqVaqEgIAAmJubQ0dHJ8/cGj9+PF68eIHDhw9zL9xC4IaAMuj+/fuYOXMmiAhSqRSDBw+GWCwWugJZW1ujVq1aORYe+eXs7IwBAwbg7du3aNSoEXbs2JFtnYEDB+LTp09wcXEBkPFje/jw4bC3t8esWbPw9OlT6OjooEKFCvj1118LHUtWX79+hbm5OZKTkxEbGwsTExMMHjwYS5YswZgxY/D48WNIJBJoa2vD2NgYfn5+AIB9+/bB398fYrFYKIBatWqFjRs3wsHBAbNnz4arqyu0tbWhpaWF/fv3y7TYMsaKV1ku1/ISGxsLf39/3L9/P9uy9PR07NixA3/++We2ZZs3b8bQoUPx888/Q09PD9u3by+JcBlTqrJclsirB8nz5MkTjBo1CiKRCFKpFA4ODjI9J1mGspxb8vTo0QM3btxAkyZNoKuri4oVKwqze8nLrcuXL2P9+vWwsLBAs2bNAAD169fHoUOHij1mdSEiIlJ2EKXd7du34ejoiFu3bgnzM5dl8+fPR0xMDH755Rdlh8LyiXOYfYtzQhaXa6UP57B6ULe/I5clxUMReVLac41zq2SU9jzJih+kYIwxxhhjjDHGyhB+NIAp3Pz585UdAmOMKRSXa4wxReCyhBUXzi1WUNwjgDHGGGOMMcYYK0O4IYAxxhhjjDHGGCtDuCFATc2fPx8TJ05UdhgQiUSwtbXF8ePHAQB79+6FnZ0dbGxsYGNjg1WrVgnrpqenY8qUKbCxsYGFhQVGjBiBlJQUYXlAQAAsLCzQqFEj9OzZE3FxcfmKISwsDF26dEHjxo1hZWWF9evX57nsy5cv6NChAwwNDWWm1MqPZcuWwcrKCnZ2dmjevDmuX78OIGO0WDs7O+FfvXr1YGBgAAD49OmTzDJzc3NoaWnh8+fPADJGeDU2NlaJvyljJUVVy7FDhw5BLBbDzs4OVlZWmD17NjLH3V28eLHMtaynp4fJkycDyCjjJk+eDCsrK4jFYri4uODFixd5Hv/169fQ1NSU2e/Lly8BZJQrbdq0gYWFBWxsbDB8+HAkJiYCAN69e4cOHTqgcePGEIvF6NWrFz58+JCvc/bw8BDO0dnZGXfu3BGWHT9+HA4ODkJZ/scffwjLmjVrJsRoY2MjTDEGZMza0r9/fzRs2BDm5ub4+++/he2mTp0KU1NTdO/ePV/xMVYUqlq2yKsjbd++XaYMMDQ0RM+ePfM8hrzy49WrV3B0dBSO2adPH0RHRwvbFqbeJa9MAoA//vgDtra2sLOzg729vXDugPyypbTWg0pjrp07dw7ly5eXyZnMv2Fh8zCroUOHQiQSISYmJlt8mfvNOt3gzp07IZFIYGNjAzc3N4SFhQnL6tWrh8aNGwvb7du3L18xPH/+HC1btoS5uTmcnJzw8OFDYZmLiwsMDAzKxqCLxIrs1q1bBIBu3bql7FAE8+bNowkTJig7DAJA0dHRwutLly5RREQEERHFxMRQgwYNKCgoiIiItmzZQi4uLpScnEzp6en03Xff0fLly4mIKD4+nmrUqEGPHz8mIqIxY8bQlClT8jx+eno6OTg40P79+4X3IiMj81yWlJREZ86coTt37pC+vn6+z/fOnTtkampK8fHxRES0c+dOcnJyynHdMWPG0NixY3NctmLFCurcubPMe8X5N1XFHGbKpQo5oarlWFxcHKWlpRERUXJyMjk5OZG/v3+27ZKSksjAwIBu3rxJRESHDh2ipk2bUkpKChERLVy4kPr06ZPn8V+9epVrOfTs2TO6e/cuERFJpVLq27cvzZs3j4gyyrOLFy8K606ZMoV8fHzyPB4RyZyvv78/icViIsooN6tWrSoc89WrV6Srq0txcXHZ9nHgwAGysbERXv/000/C8f/77z+qXr06ffz4UVi+fft26tatW77iyw9VyGFWdMXxd1TVskVeHelb1tbW9Pfff+d5DHnlR1JSEn39+lV4PX78eBo/fjwRFb7eJa9M+vTpE1WuXFk4x4sXL1L16tWJKH9li7y/myLyhHMtiIiIgoKCSCKR5Gu/+c3DTAcPHqTvvvsuWzzfvs70+PFjMjIyonfv3hFRRr3a09NTWF63bl26c+dOvo+fycXFhbZv305EGd9VTZo0kVnu4+NDa9asyXFbdfpu4R4BKm7x4sUYO3as8PrLly8wMDDAhw8fcP/+fbRu3RoODg6wsrLCokWLctzHjh07ZO6yBAQEoF27dsLrnTt3olmzZnBwcECbNm1w9+7d4jodtGrVCsbGxgAAfX19WFhY4PXr1wCAu3fvwt3dHTo6OhCJROjUqRN27twJAPjnn39gb28PCwsLAICvry/++uuvPI935swZ6Orqok+fPsJ7RkZGeS7T1dWFq6trgXsDiEQipKamIiEhAQAQExMDExOTbOslJSVh9+7dGDFiRI772bZtW67LGCtt1K0cq1y5MjQ0Mr4+k5KSkJycDJFIlG29w4cPo06dOnB0dASQUT4kJycjKSkJRIS4uLgcy4eCaNSoEcRiMQBAU1MTTk5OQplqZGSE1q1bC+s2a9ZMWJaXrGVfbGyszPllvZMTFxeHatWqQVdXN9s+vi3H9u3bh9GjRwPImOu5Xbt2PN8zKxJ1K1vk1ZGyCg4ORlRUFLp27Vqk4+nq6qJ8+fIAgLS0NCQkJAjXemHrXfLKpPT0dBAR4uPjAWSvI+W3bFGGsppr8hQ0D9+/f4+ff/4Zq1evzvcxHjx4ALFYjJo1awIAPD098c8//+DTp08FijWrqKgo3Lx5E4MGDQIA9OrVC+Hh4fnqoadueNYAFTdkyBA4Ojpi1apV0NXVxYEDB+Di4oLq1aujXLlywo/ZxMREtGzZEu7u7mjevHm+93/58mX89ddfuHDhAnR1dXHx4kUMGDBApotMpkmTJiEoKCjH/WzevBnNmjUr0Lk9evQIV69ehZ+fHwDA0dERmzdvxtixY1G+fHns379fKJTCwsJQt25dYdt69eohIiICUqkUWlq5p/GjR49QvXp19OvXD0+fPkW9evWwatUqmJmZyV1WWBKJBJMmTUL9+vVhYGAAXV1dXLhwIdt6/v7+MDMzg52dXbZlV65cQXR0NDp37lzoOBhTJepYjl25cgWjRo3C8+fP8cMPP6Bbt27Z1vn2h3CXLl0QFBQEY2NjVK5cGbVr18b58+fzdbyEhAQ4OTkhLS0N3bt3x+zZs6GpqZltna1bt2LJkiXZtk9LS8OGDRtyjDM3Q4YMET6rzO6kIpEI+/btQ8+ePVGxYkVER0fD398fOjo6MtuGh4fj/PnzQmMukHM5nrWLJ2MFpY5lS6Zv60hZbdu2DYMHD4a2tna+9iWv/EhJSUHTpk0RGhoKsViMo0ePAih8vevb42YtkwwNDeHn5wcHBwcYGBggMTERp0+fBpD/skVZylquvXz5Eg4ODtDU1MSwYcPg6+ubbbuC5uH333+P5cuXo3Llyjkud3Nzg1QqhZubGxYuXIiKFStCIpHg9u3bePbsGczNzbFr1y4QEUJDQ1GtWjUAGX8bIkLTpk2xdOlSVK9eXW4c4eHhqFmzppDHIpEIpqamCAsLQ8OGDfN1LuqCGwJUXJ06dWBvb4+jR4+iT58+2LFjB6ZOnQoASExMhK+vL0JCQqChoYHw8HCEhIQUqOA5cuQI7t69K1NofP78GYmJiUIrcaY1a9Yo5qQAvHnzBt26dYOfn5/QGjx06FCEhoaibdu2KF++PNzd3XHy5MkiHUcqleLs2bO4du0arK2t4efnh759++LmzZtylxXWq1ev4O/vjxcvXqBWrVrYsGEDvL29cenSJZn15N3x37ZtG4YMGZLvL1rGVJ06lmMtW7bE/fv38eHDB/Tq1QsXL15EmzZthOWhoaG4dOkS9u7dK7x38+ZNPHjwAG/fvoWenh5mzJiB0aNHY9euXXKPVbNmTbx9+xY1atTA58+f4e3tjVWrVmHatGnCOikpKfD29oaHhwd69Oghsz0RwdfXF1WrVsWECRPyfY5//vkngIxneqdPn47jx49DKpVi0aJF8Pf3R5s2bXDjxg107doV9+/fh6GhobDtjh070LlzZ5n3GFM0dSxbgJzrSJkSEhKwd+9eXLt2LV/7yqv80NHRQUhICFJSUjBu3Dhs3rxZpmwprJzKpNjYWKxduxbXr1+HpaUljh07hh49euDx48fQ0NDIV9miLGUp1xwcHPDmzRvo6+vjzZs38PT0hKGhIfr27StsV9A83Lp1K0xNTeHq6prj8tDQUJiamiIhIQGjR4/G1KlTsWnTJjRq1Ah+fn4YMmQIpFIpvLy8UKVKFaGOfOHCBZiamiI1NRVz5syBj4+PzLgTTD5+NKAUGD58OLZv347//vsPL168QMeOHQEAs2bNgqGhIe7cuYO7d++iXbt2SEpKyra9lpYW0tLShNdZ1yEi+Pj4ICQkRPgXERGRrdABMlogsw4QkvVfcHBwvs/n3bt3cHd3x5w5c2S65YtEIsyfPx937tzBlStXYGVlBWtrawCAqakpQkNDhXVfv34t05qXG1NTU9jb2wv7GTx4MG7fvo3U1FS5ywrr4MGDsLW1Ra1atQAAw4YNw+XLl2UGPXz16hWuXbuGAQMGZNv+y5cv2L9/P4YPH17oGBhTRepWjmWqXr06PD09ceDAAZn3t2/fjm7dugkDggIZP6wzHznS0NCAj49Prnd1stLV1UWNGjUAAAYGBhg+fLjMQEqpqanw9vZGzZo1sXbt2mzbjx8/HuHh4di3b5/wSENBZMb56dMnhISE4N27d0Kjh5OTE0xMTGQGEyQibN++PVtjZ07luKmpaYHjYSwrdStbcqsjZTpw4ACsra1hZWWVr/3lVX5k0tHRwbBhw4RePIWtdwG5l0mnTp1ClSpVYGlpCSCjl1RcXBxCQ0PzVbYoW1nJNT09Pejr6wMATExM0L9//2w5U9A8DAoKwpEjR1CvXj3Uq1cPACAWi4W/b+Z3QcWKFeHr6ytzvN69e+PatWu4efMmfvjhByQmJgp37jO309bWxsSJE3PM7W/VqVNH6N0CZHz2YWFhZfL7iBsCSoHu3bvjxo0bWLJkCQYNGiQUwtHR0TAxMYGWlhaePn2KU6dO5bh9w4YNce/ePSQmJkIqlWLPnj3Csq5du2LXrl1C98z09PRc74ivWbNGpoDK+i+/3ZAiIiLg5uaG6dOnw8fHR2ZZUlKSMFrtx48fsXTpUqFVumPHjrh9+zaePHkCANi0aRP69esnbGthYYG3b99mO16nTp3w5s0bYdnx48dhaWkJbW1tucvyktvxzMzMcPnyZXz58gVAxvNf5ubmMl3bfv/9d/To0SPH8Qf27dsHiUQiPJPHmLpQp3LsyZMnSE9PBwDEx8cjMDBQeCY28/g5/RA2MzPD2bNnhYbBgIAA2NjYCMtzK1eioqKEBsrk5GT4+/vD3t4eQEavp379+sHAwABbtmzJNlbB+PHj8eLFCxw6dChbF9shQ4bk+Ix+TEwM3r17J7w+fPgwqlWrBgMDA6EC9fjxYwDAixcv8PLlSzRu3FhY/+zZs5BKpWjfvr3Mfvv06SN0PX316hXOnTvHswSwIlOnskVeHSlTbj0KC1N+hIaG4uvXr8K5HThwQCjLClvvklcmmZmZISQkBJGRkQCAq1evQiqVok6dOvkqW5StrORaRESEzHdcQECAkDOZCpqHu3fvRnh4OF6/fi089nvv3j3Y29sjOjpaJg/37dsnc7yIiAgAGY+4TZ8+HWPGjEGFChWQkJAgM/PAX3/9JbNdbt9xNWrUgIODg9Ab7+DBgzAxMSlzjwUA/GhAqaCrq4u+ffti06ZNQgEJAHPmzMHgwYPxxx9/oEGDBrl2t2nevDk8PT1hY2ODmjVrolWrVkKLobOzM5YvX44ePXpAKpUiJSUFXl5eaNKkSbGcy9y5cxEWFoa1a9cKrcQTJkzAsGHDEBsbi3bt2kFDQwPp6emYMGECunTpAiBjcK6tW7eie/fukEqlMtPKREVF4dOnTzJ33jJVrFgRfn5+8PLyAhFBX19f6KorbxmQ0VL54cMHYUAvFxcX7Ny5U+7xevTogRs3bqBJkybQ1dVFxYoVZQr69PR07NixQ+hy+61t27bh+++/L+Sny5jqUqdybN++fdi3bx+0tbWRlpaG3r1747vvvhOWnz59GhoaGnBzc5PZbsyYMXj8+DEkEgm0tbVhbGws/DCWV65cunQJc+fOhaamJqRSKVxdXTF79mwhFn9/f4jFYqEC1KpVK2zcuBGXL1/G+vXrYWFhIVQO69evL1SMbt68ifHjx2c7XmxsLPr06YPExERoaGigevXqCAgIgEgkgpGREbZs2YK+ffsKZfWGDRtk7qRs27YNw4YNy9b7YOrUqRg+fDgaNGgATU1NbNiwQSW6/LLSTZ3KFnl1JAB4+vQpQkJCsnV9Lmz5ce/ePeH/6enpcHBwwLp16wAUvt4lr0xycHDA7Nmz4erqCm1tbWhpaWH//v0oV64cypUrl2fZomxlJdcOHjyIX3/9FVpaWpBKpejTp4+Qg0Dh8lCeJ0+eYNSoURCJRJBKpXBwcJDpSTJ8+HCEhoYiOTkZXl5e+PnnnwFkDD7Yq1cvpKWlgYhgZmYmU7/O7TsOyBhLYejQofj555+hp6eH7du3FyhmtaGMqQrUjTpNI6FoyGU6EEXav38/LVy4sFiPoczjZeLpA1lJ4pz4P+pYjkVFRZG7u3uJHS8/ePpAlhN1/jsqqmzhepDqTh+oKtTxe0yeon7HlZXpA7lHACtWRkZGaNu2LZYsWQJPT89iOUZOz9AVp5I+HgAMHDgQwcHBOY4rwBgrXupYjlWvXj3X7qvKMHXqVBw9ehQtW7ZUdiiMlRhFlS1cD2J5UcfvMXmK8h3n4uKC169fo1WrVgqOSvXwGAFKVK9ePYSEhCg7jGyyzuOaKTk5GeXLl8ebN2+E99zd3WXmP42MjES5cuWQmJgo897du3cLXegMHToUtWvXhr29PRo1aoTWrVvLTEd18+ZNeHt7y91HSEiITJf/nBw9ehSTJk0CAJw7dy7Haf3yEhMTg6VLlxZ4u0xNmjTBuXPnAAD379+Hq6srJBIJbGxs8OzZMxw+fBgLFizA0KFD8csvvxT6OIwVJ3Us12JiYnDt2jWFVZ64XMso15ycnPDgwQMAwIcPH/DDDz+U3e6ZrESoWvmUWUfy8vLKVj4BGbNv6Ovrw87ODjY2NnBxccGzZ88KfJwvX77IPKtvZ2eH+Pj4PLfp0KEDDA0Ns41pJG+ZvLIma9lW0LJl9+7dePHiBRYsWJDvbZSJcy1Dacy1oKAgvHr1qkw8qssNASxfdHV10aJFC6FCl5KSglevXiEiIkIY9TQoKAjNmjXLcYTT3GQdPTU3U6dOxZ07d/D8+XNs2LABixYtwurVqwFkVDL37dsnd/u8KsxSqRRdu3Yt8nQsRa0wZ9W/f3+MGzcOd+/exYMHD+Dv7y+M+ssYUwwu1/LG5RpjyuXi4oKQkBA8ePAAjo6OmDhxYpH3GRISkutc7pm0tbUxffp0nD59ukDL5MlatimybGGKwblW9nBDQAm4evUqWrduDYlEArFYjCNHjmRbZ/Xq1XBycoKdnR2cnJxw9epVABmDt4wdOxaWlpaQSCRwdHREUlISPnz4AA8PD9ja2kIsFssM4qEoRITp06eja9eu+Pr1K1xcXIQKc3BwMJycnNCsWTNhDtFz587BxcUFkZGRcHFxgaOjI6ytrTF27Fhh9NEdO3bAxcUFvXr1gq2tLa5fv4527dph3LhxcHJyQsOGDfG///0PRJRjTHZ2dli7di2WLVsGIpJpDczpM4mKisLcuXMRFBQEOzs7jB49GkDG3cF58+bByckJM2fOxI4dO2RGr5ZKpRgyZAhsbGzg6OgotOp+2/r44MEDYRqU0aNHIz4+HnZ2dsLgLpGRkejbty+aNm0KW1tbzJkzR9j2ypUrQsvrsGHDhGlMgIx5XWvXri28rlOnjkyF+fHjx3Bzc4O5uTl69uwpjEI+f/589O3bF126dIG5uTk6d+6MBw8eoEOHDjA3N0f//v2FvwVjRcHlGpdrXK4xVaUu5dO33NzcZKb0GzhwIJo0aQKxWAwvLy9hNH4gYzC0Ro0awd7ePluDYGYPqQ8fPqBevXpCeff3339DIpEgMTERurq6wnSn35K3DMhfWZNT2bJo0SJYWloKU+FlPVdVxbnGuVaqKW94AvUhb9CIT58+UY0aNejChQtERJSWlkafPn0iIqK6devSnTt3iChjUItMV69epcaNGxMR0e3bt8nCwoLS0tKIiCgmJobS0tJo9erVNHLkSJnj5GTixIkkkUhy/Hft2rUctwFAkZGR5O3tTb6+viSVSomI6OLFi2RmZkZERD/99BP9+uuvtHXrVpo7dy4RETVq1IjOnz9PiYmJFB8fT0REUqmUvLy86K+//iKijMGgypcvT0+ePBGO17ZtW3J1daWUlBRKSEggR0dH2r17NxHlPFjH58+fCQC9f/+egoKCSCKREBHl+pnkNAAVAPrpp5+E11nXCQoKIgB0+vRpIiLat28fNW7cmNLT02WOR0R0//59qlu3LhERvXr1ivT19WWO4+HhQefOnSMiotTUVOrQoQPt37+fkpOTycTEhE6dOkVERP/++y8BoKCgICIiWrlyJVWoUIFcXV1p1qxZdPv2bWGfPj4+1LRpU0pISCCpVEotW7akPXv2EFHGQDr169enz58/U3p6OrVp04aaNWtGcXFxlJqaShKJhAICAuhb6jTwCVMMLte4XCPico0pX0H/jupUPmW9htPS0uj777+nBQsWCNtlPYclS5bQqFGjiCjjGjYyMqJ3794REdHMmTMpa5UfWQaOu3DhApmZmVFwcDDVrl2bnj59KhNbTmWAvGX5LWu+3fbz58+kr69PX79+JSKihIQESkxMzPG4OVHGYIGca5xrpR0PFljMrl69isaNG8PZ2RkAoKGhkeO0Gnfu3MHixYvx6dMnYR7SxMREmJmZQSqVYvjw4XBxcYGXlxc0NDTQvHlzrFmzBv/73//Qpk0bdOzYMcfjF7ZbqJeXF7p164Yff/xReK9p06aIjIxEeHg4zp07h02bNkFLSwsjRozAu3fv8PbtWzRv3hxSqRTTp0/HpUuXQESIioqCjY2NMP9sy5Yts80LO2TIEGhra0NbWxuDBg3C6dOncx0QhnK5q5bfzyTT8OHDc11Wr149Yeqvvn37YuTIkQgPD5e7v28lJCTgzJkzeP/+vfDely9f8PTpUzx58gRaWlpwd3cHAHh4eMDMzExY73//+x8GDRqEs2fP4sKFC3B2dsa2bduE55169OiBChUqAMj4u7x8+VLY1sPDA1WrVgUAODg4QFdXV+iWZW9vj+fPnxfoPBj7FpdrXK5xucZUlTqVTwCEnj9hYWEwMDAQppsDgD179mDnzp1ISkpCUlKSMB3n2bNn0alTJ9SsWRMA8MMPP2DJkiU5HtfZ2RkjRoxAy5Yt8eeff8Lc3LxQ8WdVmLJGT08PjRo1wqBBg+Dh4QEvLy+YmJgUOZbixLnGuVba8aMBKiAlJQU9e/bEypUr8eDBA1y4cAFAxkBW+vr6ePDgAQYMGIAnT55ALBbjxYsXaNGiBUJCQtCsWTP4+/vDyckpx+dSJ02aJHR7+fZf1gv8W66urjh16hTi4uKE93R0dNCqVSucOHECr1+/hoWFBRo2bIjw8HAcP34cLVu2hI6ODlavXo2oqCgEBwfj3r17GDBggPC8LQBUqlQpz88k60Aj37px4wZq1KiR7dnS/H4mBYkjazwikQhaWloy+8x6Xt/KrNhfu3YNISEhCAkJwYsXL2S60X57jKyMjIzQv39//Prrr5gzZw52794tLCtXrpzw/8y5gXNbJm9dxooLl2vZcbnG5RpTDaWlfAL+77ntN2/ewNzcHL6+vgCAS5cuYd26dTh+/DgePHiA1atX53rtyit7gIwfqtWrVy9ww2B+ZZY18mhqauLatWuYOHEioqKi0Lx5c1y8eLFY4ilJnGuyONdUCzcEFLOWLVvi+fPnQoKlp6fj8+fPMuskJSUhJSUFpqamAID169cLyz58+ICEhAR4eHjg559/Rr169fDo0SO8evUKlSpVQt++fbF+/Xo8e/YMX758yXb8NWvWCJW1b/81a9Ys17hnzZqFnj17wt3dHZ8+fRLed3FxwYoVK9C0aVPhvebNm2P58uVwcXEBAERHR8PY2BjlypVDZGQkDhw4kOfntGvXLqSmpiIxMRF79uwR7ih96969e5g4cSKmT5+ebVlun4menh5iY2PzjCGr169fIygoCEDGc0xGRkYwMTGBmZkZQkND8eHDBwCQGelbT08PiYmJwnOtlSpVgouLi8wAJe/evcObN29gYWEBqVQqHOP06dMyd78OHTqE1NRUABnPP927dw8NGjQo0DkwVly4XONyjcs1pqrUrXzKVKFCBWzduhXHjx/HnTt3EB0djcqVK6NatWpISUnB5s2bhXVdXV1x4sQJ4TluPz+/XI+7YcMGREdH4+7du9i8eTMuX76c67r5lVtZk9W3ZUt8fDzev38PZ2dn/Pjjj2jdujXu3LlT5FiKE+ca51ppxw0Bxaxq1ao4dOgQZsyYAbFYDAcHh2yJr6enh0WLFqFp06ZwdHSEjo6OsCw8PBzt27eHWCyGjY0NbGxs0KlTJ5w7dw6Ojo6ws7NDy5YtsWLFCujr6ys09okTJ+L777+Hq6urcIG7uLjg+fPnMtNrtW3bFs+fP4erqysAYMKECQgODoa1tTUGDx6ca+U3K0tLS7Rq1Qq2trZwdnYWutsCwIoVK2BnZye0Ts6cOROTJ0/Oto/cPhM3NzckJydDLBYLg2rlxdraGjt27ICtrS2WLFmCv/76CyKRCLVq1cK0adPQtGlTNG/eXKYLmIGBAYYMGQKxWCwMRpI53Y2NjQ1sbW3Rs2dPfPr0CTo6Oti3bx8mTZoEW1tb7NmzBxKJRNiXv78/bGxsIBaLIZFIoKuri59++ilfsTNW3Lhc43KNyzWmqtStfMqqVq1amDJlCubOnYuOHTuicePGQtf0rAN+2tjYYP78+XB2doa9vT10dXVzPN7t27excuVK7N69GzVq1MCuXbswePBg4cehWCxGixYtEBcXBxMTEwwePFjYVt6y3MqarL4tW2JjY9GzZ09hgLzU1FT4+PgU5eMsdpxrnGulnYhyezCR5dvt27fh6OiIW7duwcHBQdnhlDrt2rXDxIkTZUa3ZiWLc5h9i3OiaLhcUz7OYfXAf0eWH4rIE841lh/qlCfcI4AxxhhjjDHGGCtDeNYApnSZc3gzxpi64HKNMcYYY6qMewQwxhhjjDHGGGNlCDcEMMYYY4wxxhhjZQg3BDDGGGOMMcYYY2UINwQwxhhjjDHGGGNlCA8WqECPHz9WdgiMFQrnLssN5wYrrTh31Qv/PZk8iswPzjUmjzrlBzcEKIChoSEqVKiAQYMGKTsUxgqtQoUKMDQ0VHYYTEVwucbUAZdrpR+XRSy/inq9c66x/FKX7xYREZGyg1AHYWFh+Pjxo7LDKFX++OMPbNmyBWfPnoWurq5C9jl//nw8ePAAf//9t0L2V5YYGhrC1NRU2WEwFcLlWob4+Hj06NEDTZs2xc8//6zscHJ15coVjBs3DkuWLIGHh4eyw1EJXK6ph7JQFj148AA+Pj747bff4ODgoJB9BgQEYN68eTh58iSqVaumkH2qMkVc75xrhcO5VjpxQwBTmrZt20JfXx9Hjx5V2D7//vtv9OnTB//99x/q16+vsP0yxsqu//3vf/Dz88PTp09hYmKi7HDk6tatG+7cuYPHjx+jYsWKyg6HMZZP8+bNw7p16/DhwwdoaSmmw+6HDx9gZGSE33//HUOHDlXIPlnpx7nGMvFggUwpoqOjcfnyZXh5eSl0vx4eHtDS0kJgYKBC98sYK5seP36MdevWYfbs2SrfCAAAq1evRlRUFJYtW6bsUBhjBRAQEICOHTsq7IcZAFSvXh1NmzblOhGTwbnGMnFDAFOKkydPIi0tDZ6engrdr56eHtq0acMFEWOsyIgIEyZMQN26dTF58mRlh5MvDRo0wJQpU7B8+XL8999/yg6HMZYPERERuH37tsJvjgCAl5cXTp48idTUVIXvm5U+nGssK24IYEoRGBgIsViMOnXqKHzfXl5eCAoKQkJCgsL3zRgrO44cOYJTp05hzZo1KFeunLLDybeZM2fC0NAQ//vf/5QdCmMsH44fPw6RSISOHTsqfN9eXl6Ii4vDpUuXFL5vVvpwrrGsuCGAlbi0tDT8888/xdIaCWQURMnJyTh79myx7J8xpv4SExMxadIkdOzYEZ07d1Z2OAVSsWJFrFy5EocPH8bJkyeVHQ5jLA+BgYFo3rx5sYxCbm9vj5o1a3JPSQaAc43J4oYAVuKuX7+Ojx8/FltDgLm5ORo0aMAFEWOs0FatWoW3b9/il19+gUgkUnY4Bebt7Y02bdpgwoQJ3E2TMRWWnJyMU6dOFVudSCQSwdPTEwEBAcWyf1Z6cK6xb3FDACtxgYGBMDAwQPPmzYtl/yKRCF5eXggMDARPisEYK6iwsDD8/PPPmDhxIho3bqzscApFJBJh3bp1ePbsGdavX6/scBhjubh48SK+fPlSbD/OAKBz5854+vQpXr58WWzHYKqPc419ixsCWIkLDAxEp06doKmpWWzH8PLywps3b3Dv3r1iOwZjTD1NnToV+vr6mDNnjrJDKRKJRILRo0dj/vz5iIyMVHY4jLEcBAQEoHbt2pBIJMV2DHd3d+jo6HBPyTKOc419ixsCWIl6+/YtQkJCirU1EgDatm2LihUrckHEGCuQc+fOYf/+/Vi2bBn09PSUHU6RLViwANra2pg1a5ayQ2GM5SAwMBBeXl7F+ghSpUqV0LZtW64TlXGca+xb3BDAStTx48ehoaGBDh06FOtxdHV10b59ey6IGGP5JpVKMX78eLRo0QKDBg1SdjgKUa1aNSxevBjbt29HcHCwssNhjGXx7NkzvHjxothvjgAZPSXPnTuHL1++FPuxmOrhXGM54YYAVqICAwPRsmVLGBgYFPuxvLy8cO3aNXz69KnYj8UYK/38/Pzw4MEDrFu3Dhoa6vP1+P3330MikWDcuHFIT09XdjiMsf8vMDAQurq6cHNzK/ZjeXl5ISUlBWfOnCn2YzHVw7nGcqI+NR2m8pKSknD69OkSaY0EAE9PT6Snp+PEiRMlcjzGWOn18eNH/PjjjxgxYgSaNGmi7HAUSlNTE+vXr8eNGzfwxx9/KDscxtj/FxgYiHbt2qFixYrFfqyGDRvC3Nyce0qWUZxrLCfcEMBKzPnz55GQkFBiDQG1atWCvb09F0SMsTzNnj0bRISff/5Z2aEUC2dnZwwYMAAzZsxATEyMssNhrMyLj4/HhQsXSqxOBIBnVCqjONdYbrghgJWYwMBA1KlTBzY2NiV2TC8vL5w4cQJSqbTEjskYK11u376N3377DQsWLED16tWVHU6xWb58ORISErBgwQJlh8JYmXfq1CmkpqaW+I+zd+/eISQkpMSOyZSPc43lhhsCWIkgIgQGBqJz587FOlrpt7y8vBAdHY2rV6+W2DEZY6UHEWHcuHGwsrLCDz/8oOxwilXt2rUxe/ZsrF+/Ho8ePVJ2OIyVaQEBAbCwsICZmVmJHdPZ2RmVK1fmnpJlDOcayw03BLAS8fTpU/z3338l2hoJAE5OTqhevToXRIyxHO3evRtXrlzBunXroK2trexwit3kyZNRt25dTJgwgbtsMqYk6enpOH78ODp37lyix9XR0YGHhwfXicoQzjUmDzcEsBIRGBiIcuXKwcXFpUSPq6mpiU6dOnFBxBjLJj4+HtOmTUPv3r3h6uqq7HBKhK6uLn755RecPn0ahw8fVnY4jJVJt2/fxvv370v85giQ0VMyODgYHz58KPFjs5LHucbk4YYAViICAwPh6uqKChUqlPixvby88ODBA4SFhZX4sRljqmvx4sWIjo7GypUrlR1KifLy8kKnTp0wefJkJCYmKjscxsqcwMBA6Ovro1WrViV+7E6dOoGIeEalMoJzjcnDDQGs2MXGxuLixYtKaY0EAA8PD2hqanKvAMaY4Pnz51i9ejVmzJiBunXrKjucEiUSifDLL7/g7du3Za4RhDFVEBgYCA8PD6U8jmRsbIwmTZpwnaiM4Fxj8nBDACt2J0+ehFQqVVpDQJUqVdC6dWsuiBhjgokTJ6JWrVqYNm2askNRCnNzc0yaNAlLlixBaGiossNhrMx4//49bty4obQ6EfB/MyqlpqYqLQZW/DjXWF64IYAVu8DAQNjY2Cj1rpuXlxfOnDmDr1+/Ki0GxphqCAwMxPHjx7F69WqUL19e2eEozZw5c6Cvr4+pU6cqOxTGyox//vkHIpEInTp1UloMnTt3RmxsLK5cuaK0GFjx41xjeeGGAFas0tPT8c8//yi1NRLIKIiSkpIQFBSk1DgYY8qVnJyMiRMnws3NDT169FB2OEpVuXJlLF++HAcOHOCykbESEhAQgKZNm6JGjRpKi8HBwQFGRkbcU1LNca6xvHBDACtWN2/eRFRUlNIbAiwsLFC/fn0uiBgr49asWYNXr15h3bp1EIlEyg5H6QYNGoQWLVpg/PjxkEqlyg6HMbWWkpKCkydPKr1OpKGhAU9PT64TqTHONZYf3BDAilVgYCCqVq2KFi1aKDUOkUgELy8vBAYG8tzZjJVRb9++xaJFizBu3DhYWVkpOxyVIBKJsH79ejx8+BC//vqrssNhTK1dunQJ8fHxSv9xBmQ8Mvno0SO8fv1a2aGwYsC5xvKDGwJYsQoMDESHDh2gpaWl7FDg5eWFsLAwPHz4UNmhMMaUYPr06ahQoQLmzZun7FBUiqOjI7777jvMnTuX53tmrBgFBgaiZs2asLe3V3YoaN++PbS1tflOrZriXGP5wQ0BrNhERETg1q1bKtEaCQDt2rVDhQoVuCBirAy6dOkSdu/ejaVLl6JKlSrKDkflLF68GAAwe/ZsJUfCmPoKDAyEp6enSjyWpKenB2dnZwQEBCg7FFYMONdYfnBDACs2x48fh0gkQseOHZUdCgCgXLlycHNz44YAxsqYtLQ0jBs3Dk2aNMHQoUOVHY5Kql69OhYsWICtW7fi1q1byg6HMbXz8uVLPH36VGVujgAZPSWDgoKQkJCg7FCYAnGusfzihgBWbAIDA9GiRQsYGhoqOxRB586dceXKFXz+/FnZoTDGSsjWrVsREhKCDRs2QEODv/Zy88MPP8Da2hrjxo3jsVQYU7DAwEDo6OjA3d1d2aEIOnfujOTkZJw9e1bZoTAF4lxj+cU1IlYskpOTcerUKZVqjQQAT09PpKWl4d9//1V2KIyxEvD582fMnj0bQ4cORbNmzZQdjkrT0tLCunXrcPXqVezatUvZ4TCmVgICAtC2bVtUrlxZ2aEIzM3N0bBhQ+4pqWY411h+cUMAKxYXL17Ely9fVK4hwMTEBBKJhAsixsqIuXPnIiUlBUuWLFF2KKWCi4sL+vTpg2nTpiE+Pl7Z4TCmFr58+YLz58+rXJ0IAM+opGY411hBcEMAKxaBgYEwMTGBWCxWdijZeHl54cSJE0hLS1N2KIyxYnTv3j38+uuvmDdvHoyNjZUdTqmxcuVKxMbGYtGiRcoOhTG1cPr0aaSkpKjsj7M3b97g/v37yg6FKQDnGisIbghgxUKVRiv9lpeXFz59+oTg4GBlh8IYKyZEhHHjxsHc3Bzjxo1TdjiliqmpKWbOnIk1a9bg6dOnyg6HsVIvMDBQ6Bqtatq0aYOKFStyT0k1wbnGCoIbApjCPXv2DM+fP1fJ1kgAaNasGapVq8YFEWNqbP/+/bhw4QLWrl0LHR0dZYdT6kyZMgW1a9fGxIkTuRsnY0VARDh+/LjK1ol0dXXRvn17ntpNDXCusYLihgCmcIGBgdDV1YWbm5uyQ8mRpqYmOnbsyA0BjKmphIQETJkyBd26dYOHh4eywymVypcvj9WrV+PEiRNcVjJWBCEhIXj37p3K/jgDMkZ0v3btGj59+qTsUFgRcK6xguKGAKZwgYGBcHFxQcWKFZUdSq46d+6Mu3fv4s2bN8oOhTGmYEuXLsWHDx+wevVqZYdSqnXv3h3u7u6YOHEikpOTlR0OY6VSYGAgKleuDGdnZ2WHkitPT0+kp6fjxIkTyg6FFQHnGisobghgChUfH48LFy6odGskAHTo0AGampp8p4sxNfPff/9hxYoVmDp1KszMzJQdTqkmEomwbt06hIaGcqMKY4UUEBAADw8PlX5EqWbNmnBwcOA6USnHucYKihsCmEKdOnUKqampKt8QULVqVbRs2ZILIsbUzOTJk1G9enXMmDFD2aGoBUtLS4wbNw6LFi3iHlSMFdCHDx9w/fp1la8TAf83o5JUKlV2KKwQONdYYXBDAFOowMBAWFpaon79+soOJU9eXl44c+YMkpKSlB0KY0wB/v33Xxw5cgQrV65U6UeTSpt58+ahUqVKmD59urJDYaxU+eeff0BE6NSpk7JDyZOXlxeio6Nx7do1ZYfCCoFzjRUGNwQwhUlPT0dgYGCpaI0EMgqir1+/4ty5c8oOhTFWRCkpKZgwYQLatm2Lvn37KjsctaKvr4+lS5diz549uHjxorLDYazUCAwMRJMmTWBsbKzsUPLk5OSE6tWr84jupRTnGisMbghgCnP79m28f/8enTt3VnYo+WJtbQ1TU1N+PIAxNbB+/Xo8f/4c69atg0gkUnY4asfHxwdOTk4YN24c0tLSlB0OYyovNTUV//77b6m5OaKhoQFPT0+uE5VCnGussLghgClMYGAg9PX10bJlS2WHki8ikQidO3dGYGAgz5PNWCkWGRmJn376CT/88APEYrGyw1FLGhoaWL9+Pe7evYvffvtN2eEwpvKuXLmC2NjYUnNzBMjoKfngwQOEhYUpOxRWAJxrrLC4IYApTGBgIDp06ABtbW1lh5JvXl5eePXqFZ48eaLsUBhjhTRjxgzo6OhgwYIFyg5FrTVr1gzDhg3D7NmzeQ5oxvIQGBgIIyMjODg4KDuUfPPw8ICWlhbfqS1lONdYYXFDAFOI9+/f48aNG6WmW1ImFxcXlC9fngsixkqpa9eu4Y8//sDixYthYGCg7HDU3pIlSyCVSjF37lxlh8KYSgsMDISnpyc0NEpPVVtfXx+tW7fmOlEpw7nGCqv0ZAxTaf/88w9EIlGpGK00q/Lly8PV1ZUHLGGsFEpPT8e4ceNgb2+P7777TtnhlAlGRkaYN28e/Pz8cPfuXWWHw5hKevXqFR49elTqbo4A/zej0tevX5UdCssHzjVWFNwQwBQiICAATZs2RfXq1ZUdSoF5eXnh0qVLiImJUXYojLEC2L59O27evIn169dDU1NT2eGUGePGjUPjxo0xbtw4Hl+FsRwEBgZCW1sb7du3V3YoBebl5YWkpCQEBQUpOxSWD5xrrCi4IYAVWUpKCk6ePFkqWyOBjIIoLS0NJ0+eVHYojLF8iomJwcyZMzFw4EC0atVK2eGUKdra2li7di0uXryIffv2KTscxlROYGAgnJ2doaenp+xQCszCwgL169fnLtulBOcaKwpuCGBFdunSJcTHx5eq0UqzMjU1ha2tLRdEjJUiP/30E75+/Yply5YpO5QyqX379ujevTumTJmChIQEZYfDmMpISEhAUFBQqa0T8YxKpQfnGisqbghgRRYYGIhatWrBzs5O2aEUmpeXF/755x+kp6crOxTGWB4ePXqE9evXY86cOahdu7aywymzVq1ahY8fP2LJkiXKDoUxlXH27FkkJyeX2l6SQEadKCwsDA8fPlR2KEwOzjVWVNwQwIosc7RSkUik7FAKzcvLCx8+fMCNGzeUHQpjTA4iwvjx41G/fn1MmjRJ2eGUaWZmZpg2bRpWrFiBly9fKjscxlRCYGAgGjZsCHNzc2WHUmht27ZFhQoVuKekiuNcY0XFDQGsSF6+fImnT5+W6tZIAGjevDmqVq3KBRFjKu7QoUM4c+YMfvnlF+jq6io7nDJvxowZMDIywuTJk5UdCmNKR0QIDAws9XWicuXKwd3dnetEKoxzjSkCNwSwIgkMDISOjg7c3d2VHUqRaGlpoWPHjjyNIGMqLDExEZMnT4anp2epr/yoiwoVKmDlypU4evQoTpw4oexwGFOqe/fu4c2bN2pRPnl5eeHKlSv4/PmzskNhOeBcY4rADQGsSAICAtC2bVtUqlRJ2aEUmZeXF+7cuYN3794pOxTGWA6WL1+Od+/e4ZdfflF2KCyLPn36oF27dpgwYQJSUlKUHQ5jShMYGIiKFSuiTZs2yg6lyDw9PZGWloZ///1X2aGwHHCuMUXghgBWaF++fMH58+dL7Wil3+rYsSM0NDRw/PhxzJs3D1OmTFF2SIyVedu3b8e9e/cQGhqKpUuXYvLkyWjUqJGyw2JZiEQirFu3Di9evMC6deuQlpaGefPm8WwCrExISkpC48aN8eDBAwQGBsLDw0MtHlsyMTGBnZ0dd9lWIZxrTNG4IYAV2unTp5GSkqIW3ZIWLFiA4OBgtGjRAoGBgTh69Cji4uKUHRZjZd6sWbNw5MgRTJkyBVWrVsXs2bOVHRLLga2tLXx9ffHTTz/hzp07WLBgAS5duqTssBgrdlpaWggNDcWxY8dw7do1dOzYETNnzsSFCxeUHVqReXl54cSJE0hLS1N2KAyca0zxuCGAFVpgYCAaN26MBg0aKDuUIgsPD0ePHj3QuHFjnDx5Eo8ePYJYLFZ2WIyVaUSEz58/IyoqCn///TeWLVuG4OBgJCcnKzs09o3g4GCMHTsWurq6WL16NQAgOjpayVExVvy0tLRgbW2Nf//9F+np6QgKCsKKFSuUHZZCeHl54dOnTwgODlZ2KAyca0zxuCGAFQoR4fjx42rRGwAANm7cCFdXV/z111/4+vUrUlJSuCGAMSVLTExESkoKDh06BDs7O/j5+aF9+/Z49OiRskNj3xg1ahRatGiBLl264K+//oKmpiY3BLAyQywW4/79+6hRowb27t2L33//XS2e3W7atCkMDQ25y7YK4VxjisQNAaxQQkJC8O7dO7VpCNDR0cHBgwfh4OAgvGdra6vEiBhjmT8k3759i4cPH+LDhw84f/487O3tlRwZ+9bp06fh6emJHTt2QE9PT+jNwVhZYG1tLfReWrt2LYYMGaLskBRCU1MTHTt25B9nKoRzjSkSNwSwAsms2AUEBKBy5cpo3bq1kiNSnAoVKiAgIACGhoYoV64cqlatquyQGCvTwsPDAWQMRjd58mTcvXtXLe58qCNDQ0Ps2rULAQEBKFeuHNLT03HlyhVlh8VYiTAxMQEAjBw5EuPHj1dyNIrl5eWFu3fvIjw8HAkJCUhKSlJ2SGUa5xpTJC1lB8BKD6lUChMTE/z9998IDAxEhw4doKOjo+ywFKpKlSp49OgRXr16pexQGCvzGjRoALFYjNWrV8PNzU3Z4bB88PLywvPnz9G7d294eHgoOxzGSoS3tzeqVq2qljnfoUMHaGpq4vjx4zhz5gyqVq2KzZs3KzusMotzjSmSiIhI2UGw0qNWrVro378/1qxZg02bNiEwMBC9e/eGj4+PskNjjDHGGGMK0q1bN7Rv3x4HDhyAvr4+bt++DR8fHyxevFjZoTE1w7mmHPxoACsQsViMoKAgEBH27t2Lc+fOwc7OTtlhMcYYY4wxBZJIJBg3bhxq1KiB06dP4+3btzyQMisWnGvKwY8GlAFhYWH4+PGjQvZlZGSE8+fPo3Llyrhy5QrWr1+PtLQ03L59WyH7ZywnhoaGMDU1LfT2irwGGJMnIiICMTExxbLvKlWqoGbNmsWyb8ayMjQ0BAAuN8u4bt264eHDh/D390d6ejoAKOzHGX8vs6wyc+3gwYPI7KyeWQ6x4sOPBqi5sLAwWFpa4uvXr8oOhbFCq1ChAh4/flyoxgC+BlhJ0tDQECrMjJVW5cqVg0gkQmJiorJDYSpAU1MTaWlp0NTURFJSErS0inYfkb+XWX6UL18eT548KdKNICYf9whQcx8/fsTXr1+xa9cuWFpaKjscxgrs8ePHGDRoED5+/FioLwO+BlhJycxVzjVWmmXmMQDOZSbkg6mpqUIaAQD+Xma5S01NRd++fRETE4O4uLhC1/1Y/nBDQBlhaWkJBwcHZYfBmNLwNcBKCucaUxecyyzTwYMHIZFIFLpPzi+Wk1evXuHGjRto3ry5skNRezxYIGOMMcYYYyxXGhoa0NbWVnYYrAzgXCs53BDAGGOMMcYYY4yVIWr9aACPSJrxbBdjjDHGGOO6YUFl1iMVWZ/kuqliqHMuF0feqauizKyltg0BPCIpU0WvX7/G0KFDcefOHdSvXx8hISHCsvT0dEyZMgUnTpyAlpYWqlWrht9++w0NGzYEACxbtgx//PEHdHR0UK5cOaxbtw5NmzbNdowdO3ZgwoQJqF+/PgCgatWqCAoKKpHzY6pFJBIhOjoaVapUUWocZ8+eRfv27bFq1SpMnDhRZtnjx4/h6OiIkSNH4pdffslx+3r16kFXVxfly5cHAMycORPe3t7FHDUrLGXn3c8//4w//vgDz58/h7+/P7p3757jeq9fv0aDBg1ga2srvHfw4EE0aNCghCJlJY3rhoWXOYAkU67MeuStW7eQlJQEqVSq7JCKFedd3ooys5baNgTwiKQZso7+qw6kUqlCRqwtik+fPqFatWqF2lZPTw+LFi1CbGwsZs+eLbPs6NGjuHz5Mu7evQttbW0sWrQIs2bNwv79+xESEoJNmzbh4cOHqFSpEnbt2oWxY8fi+vXrOR7HxcUFhw8fLlSMjGWKiYlB5cqVoampWeh9xMbGYsaMGfD09My2LDU1FSNHjkSPHj3y3M++fftgZ2dX6DhY6VHUvHN3d0e/fv0wfPjwPNetXLmyTIMsU46S+m7nuqFqUGbdVF3qkbdu3cLEiRM5l8u4os6spbYNAZl4RNLSTyQSYe7cuTh+/DjatWuHqVOnYvTo0Xj+/DmICOPGjcOoUaNw8uRJrFy5EidPnkRcXByqVauGjRs3YuTIkfjzzz9x7tw5/P7771i0aBF2794NXV1dAMCRI0dQt25duTE8efIEe/bswf79+zFixAhMnTq1UOdiYGCA1q1b49y5czmeZ3JysjA9T1xcHExMTIRlqampSEhIQKVKlRATEyMsYywvRIQZM2bg8ePH2Lt3LypUqJDruomJiQgICMCePXtw584dPHz4EBUrViz0sceOHYs5c+bA398/27IFCxagT58++Pz5M2JiYgp9DKaalJV3OfWUYqpHmd/tXDcsW9SxHvno0SMAnMusaNS+IYCpB01NTdy4cQMA4O3tjcaNG8Pf3x9RUVFwdHSERCKBs7Mz+vXrh+TkZAQFBcHJyQmnT5/GyJEjcerUKXTq1AnR0dFYuXIlIiIiUL58eXz9+hUaGjmPmfnmzRvs3bsXe/fuhY6ODvr164dz587B2NgYABAfHw9nZ+cctzUyMsK///5boHPs0qULgoKCYGxsjMqVK6N27do4f/48AEAikWDSpEmoX78+DAwMoKuriwsXLuS6r0uXLsHOzg4VKlTApEmT0KdPnwLFwtRHcnIy+vfvj2rVquHQoUM53mVNS0vD6dOnsWfPHly4cAHt27fHuHHj0K5dO+H6WLFiBXbv3p3jMebNm5fjXf2///4bGhoa6Nq1a7aGgODgYFy9ehWnTp3CTz/9lOd5DBkyBESEpk2bYunSpahevXp+Tp8piTLzriASEhLg5OSEtLQ0dO/eHbNnzy5SDxhWMMr4bmdlU1moRzJWYKSmbt26RQDo1q1byg5FqdThcwBA4eHhwmsDAwN69eqV8Hr8+PG0cOFCIiJycXGhM2fO0NixY+nIkSPUoEEDSktLo1q1alFERARJpVJq0qQJ9ezZk/z8/GT2m9XBgwdJJBLRgAED6PXr1wo/p6CgIJJIJDLvBQcHk5ubG0VHR1NaWhpNnTqVBg4cSERE//33HzVv3pzevn1LRETr16+nVq1a5bjvDx8+UEJCAhERPXr0iExMTOjq1asKP4eSUtQcVodroLAAkKOjIy1YsEDuehKJhIyMjGjv3r2UkpKikGNHRESQRCKh6OhoIiLy8fGhNWvWEBFRQkICOTg40H///UdERPPmzaMJEybkuq/Q0FAiIkpJSaFp06ZRp06dFBKjopXlXMtKmXmXVdu2benQoUO5Lk9KSqL3798TEdGnT5/I3d2dli1bpvA4SpvMPC7uXFbGd/uKFSv4GlUBJV1WqmM9cvPmzZzLrMjXEjeXslKhUqVKuS4TiUTC/93d3XH69GlcuHABbm5usLW1xa5du1C1alUYGxtDU1MT165dw8SJExEVFYXmzZvj4sWL2fbZvn17/Pbbb4iMjETnzp2xYMECPHv2TGad+Ph42NnZ5fivQ4cOBT7HP//8E66urqhSpQo0NDTg4+MjDPJ38OBB2NraolatWgCAYcOG4fLly0hJScm2H0NDQ6ELrqWlJTw9PXH58uUCx8PUg6urK06dOoW4uLhc19m6dSv69euHWbNmYeDAgTh06BCSk5Nl1lmxYkWu+X7o0KFs+7x16xYiIiJgZ2eHevXq4e+//8aCBQswe/ZsvHz5EmFhYXBxcUG9evXwyy+/4Pfff4ePj0+O8WU+96atrY2JEyfmeM0y1aKsvCsIXV1d1KhRA0BGd9vhw4dzbpWwkv5ub9asWbGcB1N9ZaEeyViBKbhhQmWo252ZTp060ZMnT/Jc78cff6Rdu3YJr9XhcwAg3FUkIurbty/NmjWLiIiioqKoTp06dO3aNSLKuKtet25datOmDRERbdiwgerWrSvcbYyLixPuqhMReXt709q1a+Ue/927d7R69WpycnIiR0dHOnbsWJHPKaceAatWrSI3NzdKTk4mIqKlS5eSh4cHEWW0LFtZWVF8fDwREe3du5fMzc1z3PebN2+E/0dGRlLDhg3pzJkzRY5ZWbhHQOFlXjtr1qwhJycn+vjxo9z109LS6NSpUzRs2DAyNTWlIUOGUFJSkkJiydoj4FvyegR8+fJF5vpftWoVOTs7KyQmRSvLuZaVquRdXj0C3r9/L/RESEpKot69e9OPP/5Y5OOWdiXZI6Ckv9vL8jVa2HpkUV27do3EYjE1atSIXFxc6M2bN0rpEaBu9UhV7hGgSrmWm4ULF5KZmRmZmZkJuUBEdP36dWrRogWVL1+eunXrJrPN9u3bSU9PjyQSCUkkEmrXrp3CYi+sol5L3BCg5tThc/i2AI+MjKQePXqQjY0NWVtbk5+fn7AsLS2NqlSpQj/99BMRET19+pQA0NGjR4mIKDw8nJo1a0Y2NjZka2tLPXv2pJiYmHzH8uzZM7p48WKhzyUhIYFq165NhoaGpK2tTbVr16YZM2YQUUZF9LvvviMLCwuytbWl9u3b08uXL4mIKD09nWbMmEGNGzcmsVhMLVq0oJs3bwr77dSpE924cYOIiGbOnElWVlYkkUjI1taWNm7cWOh4VQE3BBRe1mtny5YtJBaLKSIiIl/bJiUl0cGDBykxMVEhsRSkIeDIkSM0YsQIIiJ6+fIl2dnZka2tLdnY2FDXrl1lunSqkrKca1kpO+8WLlxItWvXJh0dHapWrRrVrl2boqKiiCij4vnrr78SUUYDq7W1NYnFYrKysqKxY8cqrOGrNFNWQ0BJfLfzNVqy0tLSqEGDBnT27Fkiyng0o3fv3kpvCFCHemSVKlU4l7PILddycv78ebKysqIvX75QUlISOTo6UkBAABFl/H2Dg4PJz88vx4aAb99TNm4IyEVpLOwPHz5MFhYWJBaLadq0aVStWjWhwlu3bl26c+cOEWXc5fjf//5HrVu3JjMzMxo1apSwj28r26Xxc2AsK24IYKUF5xpTByXVEKAM6n6NFkc9siiuX79OjRs3Fl7HxcWRrq4uXblyRa3/DiVB2blcWnItp0ZlX19fWrJkifB648aNwphcmXL60a+ODQE8a4CKiIqKwvDhw3H58mVYWFhg+/bt+PTpU67rv3z5EkFBQUhNTYWVlRWuXr2KFi1alGDEjDHGGGNMFZREPdLb2xtPnz7NcdmxY8dQp04dmffCwsJkptWrXLky9PT08OHDhwKcGVM1pSnX3r17BzMzs2zrtm7dWnhdr1497N27V248mdRtVi5uCFAR165dg1gshoWFBQDAx8cHo0ePznV9b29vaGlpQUtLC3Z2dnj58iU3BDDGGGOMlUElUY/ct2+fQmNmpVNZzbXOnTujb9++qFChAh4/fgwPDw/UqVMHzZs3V3ZohcazBpRS5cqVE/6vqakJqVSqxGhUk6enZ66tiVnNnTs313mqCyM4OBgSiQTm5uZwdXXF27dvc1xvx44d0NfXF0aIdXFxEZbt3bsXdnZ2sLGxgY2NDVatWiUsS09Px5QpU2BjYwMLCwuMGDEix9kDGANU/zqQl+vnzp1D+fLlZUZSTkxMBMDXgSpS9VzLlJiYCCsrK9jZ2Qnvbd++XSbPDA0N0bNnTwDAly9f0KFDBxgaGqJKlSoKi5sVnrJyTZ0Uph7p7e2d6yj34eHh2dY3NTVFaGio8Do+Ph6xsbGoXr26Yk6iBJSWck2VKTPXMmfbkrfu69evhdmJ5FHHWbm4R4CKaN68Oe7du4enT5+icePG2LVrF1dqi+j48eP5Wm/BggUKO2Z6ejoGDhyI3377DS4uLli5ciUmTpyIAwcO5Li+i4sLDh8+nO39OnXq4MSJEzA2NkZsbCwcHR3h6OiIdu3aYdu2bbh9+zZu374NbW1tjBw5EmvXrsXUqVMVdh5Mfaj6dSAv1wGgcePGCAkJybYdXweqR9VzLdP06dPRqlUr3LhxQ3hv2LBhGDZsmPDaxsYGAwcOBJAxbeX06dNhYGAg5CVTLmXkmqoriXpkQe/SOjo6IjU1FUFBQXBxccHmzZvRpUsX6OrqKjSu4lRayrWSVJpyLWsjRKY+ffpgzJgxGDduHLS0tPD7779j/vz5eR7j7du3qF27NgDg/fv3OHv2LLy9vQsUp6rhHgEqokaNGti6dSu6d+8OOzs73L9/H5UqVeK7D3k4cuQILC0tIZFIMH36dBgaGuL169cAMp75yfwB0a5dO0yZMgXOzs5o0KCBTBemoUOH4pdfflFIPLdu3YKWlpZwd3/UqFE4duwYkpKSCrSfVq1awdjYGACgr68PCwsL4bzu3r0Ld3d36OjoQCQSoVOnTti5c6dC4melU2m+DuTlujx8HShHac41ADh9+jTevn0r/MjPSXBwMKKiotC1a1cAgK6uLlxdXfn7uISpWq6pOlWsR2poaGDXrl2YMGECzM3NERAQgDVr1igtntyoWq4pqi5ZXEpjrnl6euLmzZsAMv6O3t7esLW1haWlJdq3b4/OnTsDAJ4+fQoTExNMnjwZ//77L0xMTLBp0yYAwMaNG2FtbQ07Ozu0b98ekyZNgqura8mfrAJxjwAV4u7ujh49egAADh8+jICAAOGiyloxPnfunMx2f//9t/D/HTt2FHOUqqO0D1YC5G/QkUePHuHq1avw8/MDkNHquXnzZowdOxbly5fH/v378/XDiakndbgOMn2b65nxOjg4QFNTE8OGDYOvry8Avg6UobTnWkxMDKZNm4YTJ07g0aNHucawbds2DB48GNra2nJjZcWHB1AuHFWsR7Zo0QL37t2TeU+VBgss7eWaspSWXMv0bc+OuXPnYu7cudnWa9y4Md68eZPjPn7++Wf8/PPPRQ9UhXBDgApZv3499u3bh7S0NOjp6fFzbXko7YOV5GfQkTdv3qBbt27w8/ODiYkJgIxW59DQULRt2xbly5eHu7s7Tp48WWxxMtVW2q+DTDnluoODA968eQN9fX28efMGnp6eMDQ0RN++ffk6UILSnmtjx47FrFmzUKNGjVwbAhISErB3715cu3at2OJgeeMBlAuH65EFV9rLNWXhXFMP3BCgQmbNmoVZs2YpOwy1VdjBSgrSiluQwUoMDQ2F/2cddCSzIeDdu3dwd3fHnDlzZHoKiEQizJ8/X3ieae/evbC2ts7zXBgDVO86AHLPdT09PeH/JiYm6N+/Py5evIi+ffvydVAKqFquXbp0CZcuXcKUKVOQlJSEz58/o3HjxjLHO3DgAKytrWFlZZVnrEx18ADKGbgeWfxUrVxTFs419cANAazUKu2DlcgbdCQiIgJubm6YPn06fHx8ZLZLSkpCYmIiqlatio8fP2Lp0qVYuHBhIc+QlXal/TqQl+sREREwMjKChoYG4uPjERAQgBEjRgDg60AZSnuufdtddeLEidkGoty2bZuQY0x5eABl1TN//nzExMSo3ZgLpb1cUwfKzq2rV6/ihx9+AACkpqaidevWWLduHXR1dXH27FnMmDEDX758gUgkgpeXF5YuXQoNDQ3cv38fY8aMQVRUFLS0tNC0aVNs3LgR5cuXB5Bx487GxgaampoAMnpRODs7K+Ucc8ODBbJSq7QPViJv0JG5c+ciLCwMa9euFaZL2b59OwAgNjYWLVu2hLW1NZydnTF69Gh06dKl5E+WqYTSfh3Iy/WDBw/C1tYWEokEzZs3R/v27YWR3fk6KHmlPdfy8vTpU4SEhOQ4CrRYLEaLFi0QFxcHExMTDB48WGHnwLJTxVxj6kkVcy0/5Zq8cU5YwUgkEty4cQMhISG4f/8+oqKihAECq1atir179+LRo0e4desWrly5gj///BNARu+QDRs24MmTJ7h79y4SEhKwbNkymX1fvHgRISEhCAkJUblGAAAAqalbt24RALp165ayQyEioq9fv1Lfvn3J0tKSxGIxtW/fnoiIIiIiqF27duTg4EBWVlY0ZswYSktLIyKi7du3k5ubG/Xr148sLS2pRYsW9PDhQ+revTtZWFhQ+/btKT4+noiI5s2bRz179iQXFxdq3Lgxde7cmT5+/Khyn4OixcXFCf8/dOgQWVhYKDEaVhyKmsPqfg0Q8XWgKjjXmDrIzGNl53Jx5Fppv0aVVZfMXDZhwgQhlhUrVpCTkxPZ29tThw4d6PXr10REdPToUbK1tSWJRELW1tZ0+PDhbOehan+H0liuKfozVJfcKqrExETq0KEDrVmzJsflY8aMoXnz5uW4bMWKFeTj4yO8BkDR0dEKjzGrouYBPxpQQk6cOIGYmBihBe/z588AgCpVquDYsWOoVKkS0tLS0K1bN+zfvx/9+vUDANy4cQP379+HqakpBg8ejC5duuDKlSswMjJC586d8ccff2DMmDEAMlqd7t27B2NjY/j6+mLmzJlyBzxRBzxYCWN8HbCSw7nGSgrnWnbKqktu2bJFJo49e/bg6dOnuHr1KjQ1NbFz5074+voiMDAQc+bMwebNm9GiRQukp6cjLi4u23kkJCQU58dUYJxr6pNb8fHxud55NzIywr///pvjstevX6Nbt254+fIlvLy8hBmKsoqMjMTff/+NgICAbMsSEhKwdetWLFmyROZ9Nzc3SKVSuLm5YeHChahYsWKOx1cWbggoIRKJBI8fP4avry/atm0LT09PAEB6ejqmT5+OS5cugYgQFRUFGxsb4QJr0aIFTE1NAQBNmjRBamoqjIyMAABOTk54/vy5cAwvLy9hPu6RI0eiZ8+eat8QwIOVMMbXASs5nGuspHCuZaesuuS3Dh8+jBs3bsDR0REAkJaWJixzc3PDhAkT0Lt3b3h4eMDOzi7b9qr2Y4hzTX1yq3LlytnGfsmPevXq4e7du/jy5QsGDRoEf39/4RwBIC4uDl26dMG0adPQpEkTmW1TUlLg7e0NDw8PYUpFAAgNDYWpqSkSEhIwevRoTJ06VXjkQFXwGAElxMzMDI8ePULHjh1x+fJl2NjYIDo6GqtXr0ZUVBSCg4Nx7949DBgwAElJScJ2345OWpDRSkUiUfGcDGOMMcYYK1GqUpckIsycOVN49vn+/fu4f/8+AGD16tXYvn07KlSoAB8fHyxfvjzb9qrWI4CpT27Fx8cL4w19+69Dhw55fg6VKlVCv379ZHqFxMfHo2PHjujWrRsmT54ss35qaiq8vb1Rs2ZNrF27VmZZZgNJxYoV4evri4sXL+Z5/JLGDQEl5M2bNxCJROjatStWrlwJIkJ4eDiio6NhbGyMcuXKITIyEgcOHCj0MY4fP473798DALZu3Qp3d3dFhV8mzJ8/HxMnTlTa8V+/fo127dpBX18/WyvnuXPnUL58eZkCLTExEUBGa+3kyZNhZWUFsVgMFxcXvHjxQth22bJlsLKygp2dHZo3b47r16+X5GmxUkbZ18HVq1eFHLe2tsaoUaOQnJwMQH6u379/H23atIGFhQVsbGwwfPhw4Rp59+4dOnTogMaNG0MsFqNXr1748OGD0s6RZafsvEtPT8eUKVNgY2MDCwsLjBgxIseRw4cOHQqRSISYmBjhvT/++AO2traws7ODvb09jh8/XoKRs7JEVeqS3bt3h5+fn9B9PDU1FXfu3AEAPHnyBNbW1hg7dix++OEHXLt2Ldv2qtYjQJGUXZYVlrrkVmaPgJz+5fZYwIsXL5Camgog4+7+oUOHIBaLAQBfvnxBx44d0bFjR8yZM0dmO6lUin79+sHAwABbtmyRadiIjo7G169fAWR8v+zbtw/29vYF+rxKAj8aUELu37+PmTNngogglUoxePBgiMVioYuLtbU1atWqVaQf787OzhgwYADevn2LRo0aYceOHTLzkDLVpqenh0WLFiE2NhazZ8/Otrxx48Y5dnc6evQoLl++jLt370JbWxuLFi3CrFmzsH//foSEhGDTpk14+PAhKlWqhF27dmHs2LHcGMBUVubovdra2khPT0evXr2wadMmTJo0SW6uZ47eKxaLkZaWhgEDBmDZsmWYP38+NDU18eOPP6J169YAgKlTp2Lq1KnYsWOHck+WqYxt27bh9u3buH37NrS1tTFy5EisXbsWU6dOFdbx9/eHtra2zHafP3/GuHHj8OzZMxgbG+PSpUvo2bMnoqKiSvoUWBmgrLrktwYOHIhPnz7BxcUFQMYPouHDh8Pe3h6zZs3C06dPoaOjgwoVKuDXX38tdCys5JTl3Dp79izWrVsn9GBwc3PDjz/+CABYu3Ytrl+/joSEBPj7+wMA+vTpg9mzZ2Pfvn3w9/eHWCwWfuS3atUKGzduxJMnTzBq1CiIRCJIpVI4ODhk6zGgEhQxYqEqUrURSYvbtyNuZiptnwOPWkoUFBREEokkz/cyHT58mCQSCcXFxVF6ejpNnTqVJk2aREREISEhVLNmTYqMjCQiovXr11OPHj0UGm9xK4uzBvB1kOHb0Xvl5fq3vh29N6sDBw5Q27ZtFR5vacy1rMpy3o0ZM4YWL14svD548CDZ2toKryMjI8nR0ZHi4uJkRoL+8OEDVapUiZ49e0ZERMeOHSN7e3uFxKQsJTFrAM+kpJpyq0sqWnH/HcpCWVbacrmkcqus4VkDmFop66OWyvPy5Us4ODhAU1MTw4YNE0Y07dKlC4KCgmBsbIzKlSujdu3aOH/+PICMu6uTJk1C/fr1YWBgAF1dXVy4cKHAx2Ylq6xfB7mN3isv17PKbfReIGPgoQ0bNqBbt245HrssK8t55+joiM2bN2Ps2LEoX7489u/fj9evXwvLv//+eyxfvhyVK1eW2c7Q0BB+fn5wcHCAgYEBEhMTcfr06Xx+4mUXz6TEilNZKMvKly+vsM+LlV3cEKAm5s+fr+wQFKKsj1qaGwcHB7x58wb6+vp48+YNPD09YWhoiL59++LmzZt48OAB3r59Cz09PcyYMQOjR4/Grl278OrVK/j7++PFixeoVasWNmzYAG9vb1y6dElhsTHFK+vXQW6j98rL9Uy5jd4LZAxC5Ovri6pVq2LChAkFjkvdleW8Gzp0KEJDQ9G2bVuUL18e7u7uOHnyJICMZ1lNTU3h6uqabbvY2Fih66ilpSWOHTuGHj164PHjx9DR0SlQDGUJz6SkmrguWXrKstu3bwv7LQ3UJbfUDTcEMJWSOWrp2bNncfr0aUybNg0hISHYuHGjMGppuXLlMHny5BIZtXTkyJHZlq1evRoPHz5EUFAQfHx8MHDgQEybNk1mHUX3CNDT0xP+b2Jigv79++PixYvo27cv/vzzT7i6uqJKlSoAAB8fH3h4eAAADh48CFtbW9SqVQsAMGzYMIwbNw4pKSlcSVVhfB1kyDp6b79+/eTmOiB/9F4AGD9+PMLDw3H48GFoaPBYud8qy3knEokwf/58obK6d+9eWFtbAwCCgoJw4cIFmbmjxWIxjhw5gpcvX6JKlSqwtLQEkNFrZfjw4QgNDUWjRo1yPe+yTlVyjaknVcmv4izLuEcAUwRuCGAq5c2bN6hatSq6du2Kjh074vDhw7mOWtqrV69CHSNz1FIjIyO5o5auWrUKvXv3hoGBAVJTU/HgwQPY29sLo5ZaW1tDS0tLuGuUlaJ7BERERMDIyAgaGhqIj49HQEAARowYASDjC+/48eOYMmUKdHR0EBAQABsbG2HZ9u3b8eXLF1SqVAkBAQEwNzfnRgAVV5avgxcvXqBu3brQ1tbONnqvvFyXN3ovkNEI8OLFCxw+fJjzPxdlOe+SkpKQmJiIqlWr4uPHj1i6dCkWLlwIADLTSAEZFf579+6hSpUqICKEhIQgMjISxsbGuHr1KqRSKerUqVOg45c1qpJrTD2pSn4VZ1lW2noEMNXEDQElZP78+YiJicEvv/yi7FBUWlketfTr168wNzdHcnIyYmNjYWJigsGDB2PJkiU4ePAgfv31V2hpaUEqlaJPnz4YNmwYAGDMmDF4/PgxJBIJtLW1YWxsDD8/PwBAjx49cOPGDTRp0gS6urqoWLEi9uzZo5B4WfEpy9eBvNF75eW6vNF7L1++jPXr18PCwgLNmjUDANSvXx+HDh1SSMzqoiznXWxsLNq1awcNDQ2kp6djwoQJ6NKlS57bOTg4YPbs2XB1dYW2tja0tLSEWSxY7ngmpcJRlbqkSCSCjY0Nli1bJnS7X7RoEbZv3w4A6NevHxYvXqy0+MpyWVZYqppbe/fuxdKlS4XeGMOGDcP//vc/mW2ICG5ubrh9+7bM1K65efXqFXr37o20tDRIpVJYWlpiy5YtqFq1Ku7fv48xY8YgKioKWlpaaNq0KTZu3Cj0wAgODsbIkSORmJgIExMT7Ny5E7Vr1wYA9O7dG1euXEFERASio6OF3ot5yW2fiYmJaNGiBV68eIFdu3ahe/fu+f4c861oYxWqLlUbTVNZo2Wq2uegbDxqaelTFmcNKG58HRQPzjX5OO9Kh5KYNaC4qctMSt9SlWsIWWbOICI6f/48WVlZ0ZcvXygpKYkcHR0pICAg1+3571B0iv4MVeGciLLn1qVLlygiIoKIiGJiYqhBgwYUFBQks82qVavou+++I319/XwdIykpib5+/Sq8Hj9+PI0fP56IiJ49e0Z3794lIiKpVEp9+/alefPmERFRWloaNWjQgM6ePUtEGTNC9O7dW9jPqVOn6P3799nOQZ689klE1LZtWzp06FCO2xc1D/ghyQJavHgxxo4dK7z+8uULDAwM8OHDB9y/fx+tW7eGg4MDrKyssGjRohz3sWPHDplWnYCAALRr1054vXPnTjRr1gwODg5o06YN7t69W1ynwxhjjDHGSpC61SX37duHwYMHo2LFitDV1cXw4cPx119/FdvxWO7ULbdatWolDMqor68PCwsLmRldHj58iMOHD2PGjBn53qeurq5whz8tLQ0JCQnC44SNGjUSHkfU1NSEk5OTcLxbt25BS0tL6OExatQoHDt2TBhnwt3dHTVq1CjQ+eW1z+LGjwYU0JAhQ+Do6IhVq1ZBV1cXBw4cgIuLC6pXr45y5crhzJkz0NXVRWJiIlq2bAl3d3c0b9483/u/fPky/vrrL1y4cAG6urq4ePEiBgwYgIcPH2Zbd9KkSQgKCspxP5s3bxa6wLL/w6OWMsbXAVMOzjtWUlQ919StLhkWFobWrVsLr+vVq4e9e/fmO97SRpXzS91yK6tHjx7h6tWrwiOBqamp+P7777Ft2zZoamoWaF8pKSlo2rQpQkNDIRaLcfTo0WzrfDsVcVhYGOrWrSssr1y5MvT09PDu3TuYmZkV6PiZimOfBcENAQVUp04d2Nvb4+jRo+jTpw927NiBqVOnAgASExPh6+uLkJAQaGhoIDw8HCEhIQW6wI4cOYK7d+/KXByfP39GYmJithFC16xZo5iTYowxxhhjJYLrkqy4qGtuvXnzBt26dYOfnx9MTEwAAD/99BN69uwJS0tLmV4C+aGjo4OQkBCkpKRg3Lhx2Lx5s8ysDfKmIlYn3BBQCMOHD8f27dvh6OiIFy9eoGPHjgCAWbNmwdDQEHfu3IGWlhZ69uyZY9cOLS0tmblEs65DRPDx8cHPP/+cZxzcI4AxxhhjrPRRp7qkqampzECMr1+/hqmpaZ7bseKhTrkFAO/evYO7uzvmzJmDPn36CO+fP38eYWFh2LBhA6RSKeLi4lCvXj3cuHED1atXz9e+dXR0MGzYMHz//fdCQ0BuUxF/m+fx8fGIjY0VpugujOLYZ0HwGAGF0L17d9y4cQNLlizBoEGDoKWV0Z4SHR0NExMTaGlp4enTpzh16lSO2zds2BD37t1DYmIipFKpzCjuXbt2xa5duxAWFgYASE9Px82bN3Pcz5o1axASEpLjv9LSCDB//nxMnDhR2WFAJBLB1tYWx48fF947f/48nJycYG1tDSsrK1y9ehUAhOnM7OzsYGVlhdmzZ4OI8nWc3PYpb9mNGzfQsmVLVKhQoUAjhiYmJmLIkCGwsbGBjY0Nunbtig8fPgjL79+/j3bt2sHS0hKWlpbw9/eX2Z6IZOZsB4CXL1/Czs5OaEllhaOqeb948WLY2dkJ//T09DB58mQAwLlz51C+fHmZ5YmJifk+Vk75BAArVqyAjY0NrKys0KNHD5kRf6OjozFw4ECYm5vD2to6X88AJiQkoFmzZpBIJJBIJOjYsaNwp+Ddu3fo0KEDGjduDLFYjF69eslcE/Xq1UPjxo2F89u3b5+wbPz48ahXrx5EIlG23HdxcYGBgUG2aeZUkarmXl7l6qJFi9CgQQM0aNAAs2fPFt4vbHm8d+9e2NnZCeXjqlWrZJbnVT7mJrftXr9+jXbt2kFfXx92dnYy26Snp2PKlCmwsbGBhYUFRowYgZSUFADqU+aqSt6p0nRr6lSX7NOnD3bu3ImEhAQkJyfj999/R79+/QAA169fh5ubW74/l8JQlfz6tlyTV85cvXpV+K6xtrbGqFGjkJycnOey/FCn3IqIiICbmxumT58OHx8fmWUXL15EaGgoXr9+jUuXLkFPTw+vX78WGgEsLCzw9u3bbPsMDQ3F169fhfgPHDggjAsgbypiR0dHpKamCo0bmzdvRpcuXfI1S4ybmxuuX7+e7f2i7FMhCjXEYClQ3COS+vr6EgB6/Pix8N7t27fJ2tqabG1tqXv37tS1a1das2YNEWUfjXP06NFkZmZGrVq1omnTplHbtm2FZX/99Rc5ODiQWCwmCwsL+t///lfoOFV9ZFZVHaX07du3VLduXXr06BERZYwwmrk8Li6O0tLSiIgoOTmZnJycyN/fP89jyNunvGXh4eEUHBxMfn5+1K1bt3yf05o1a6hXr16Unp5ORETfffcdTZ06lYiIEhISqH79+nTx4kUiyhgZNSoqSmZ7eaOw1q1bl+7cuZPvWIpCHWcNUNW8zyopKYkMDAzo5s2bREQUFBREEomk0MfKKZ9OnjxJlpaWFBcXR0RECxcuJF9fX2F59+7dacWKFcLrzJGD5UlLSxP2R0S0evVq6tq1KxERRUZGCjlPRDRlyhTy8fERXsvL6/Pnz1N4eHiu6/j4+NDkyZNVLte+paq5J69clTcieWHLY3kjUeenfMyJvO0+ffpEFy9epICAgGzX0ZYtW8jFxYWSk5MpPT2dvvvuO1q+fLnMOsoocxWZy6qUd6p0jZaWuuS3cvru+Omnn6h+/fpUv359mjFjhvD+vn37yMvLS2ZdHvE+o7xISUkhoozvre7du9Pq1avzXJYpr89QXXLru+++owoVKpBEIhH+/f7779m2e/XqlUz94v3792RoaCgzO0Cmo0ePkq2tLdna2pK1tTUNHjyYPn78SEREu3btIgAkFouF42Wtl1y5coVsbW2pUaNG1LZtWwoLCxOWeXp6Uu3atQkA1apVS/jMpFIpVatWjd6+fZvjOcvbJ1HxzhrADQFqrqQ+h0WLFtGYMWOE1/Hx8VS1alWKioqie/fuUatWrcje3p4sLS1p4cKFwnpZC57t27fL/NA9duyYTMHz559/UtOmTcne3p6cnZ0pJCREYfF/W/DMnj2bZs6cmed2sbGxJBaLc71As5K3z/wc79vPJy+//PILderUiZKTkyk1NZX69etHa9euJSKi3377jfr375/rtg8ePCBnZ2d68eIFNwTIoW55n9XevXtlfrAUpSEgt3xasWIFff/998LrW7duUeXKlYmI6Pnz52RiYiL8yCuM9PR0mj9/fq7XzYEDB2Q+6/zktao0BKhz7n1brvr6+tKSJUuE5Rs3bqSBAwfmuV1BeHl50fbt24ko7/IxN/nZLqfraMyYMbR48WLh9cGDB8nW1lZmHVVpCFCHvOO6YdHJu36/5evrK9MAS5T797I65Je8zyVrOZNVYmIidejQQfhRnp9l6vo7pyC5Jc/+/ftlckSZrl+/TiNGjCj09jx9IFN5Q4YMwf79+4WuS1lHKa1Xrx7OnDmD27dv49atWzh48CCuXbtWoP1nHaX09u3bWLx4MQYMGJDjupMmTZLpvpz1X3BwcL6O9+jRIyQmJsLd3R12dnYYN24cEhIShOVXrlyBra0tatSoAVdXV3Tr1q1I+8zreIUxatQoVK5cGTVq1ICRkRFiY2OFKWUePXoEXV1ddO7cGXZ2dhgyZIjQRTpzFNbNmzcXeBTWskbd8j6rbdu2YcSIETLvvXz5Eg4ODnBycsKmTZvytR95+eTo6IjTp08jMjISRITdu3cjPj4enz9/xqNHj2BiYoIffvgBjo6O8PDwwJ07d/Idv7u7O4yNjXHgwAFs3Lgx2/K0tDRs2LAh27U7ZMgQ2NraYsSIETKPDagadcy93MrVb0dVrlevntAtVd52+ZU5ErW7u7vwOrfyMa/9FGY7R0dHHD16FHFxcUhNTcX+/fsLPPBVSVHHvGMFZ2RkhLZt28o8TpmbjRs3yswoII8659e35QyQ8ciQRCKBoaEh9PX14evrm69l6qwguSVPnz59MGfOHAVFVTROTk7YunVrgbdLTEyEnZ0d/vvvv2J7VIAbAr5Rr149lXwOTyQSyTw7m2nHjh3CM4eZ/3J7Vqc4ZR2lNDOuYcOGAchI5O+++w62trZo3rw5QkNDC/wZZx2lNPOHcuYopd9SxDNJUqkUFy5cwIEDB3Djxg1ER0dj3rx5wvKWLVvi/v37CA8Pyh4qTgAA3qNJREFUx61bt3Dx4sUi7TOv4xXGyZMnkZ6ejsjISERERKBKlSqYO3eucLzTp09j8+bNuHPnDmrXro0ffvgBgOworEw+dcv7TKGhobh06RIGDhwovOfg4IA3b97g9u3bOHToEPz8/LB///489yUvn1xcXDBlyhR07twZzZs3F57r09LSglQqxfXr19GvXz/cunULkyZNQufOnZGampqvczh9+jQiIiLg7e2NxYsXyywjIvj6+qJq1aqYMGGC8P6FCxdw79493L59G4aGhtmeR1Ql6ph7hSlXi7IdkPNI1PLKR3kKu93QoUPRsWNHtG3bFm3btoW5ubnwTK+qUce8y4/SVDdMTk5G+fLl8ebNG+E9d3d3mXngIyMjUa5cuQKN85JVZGQk7t69C09PT+G9oUOHonbt2rC3t0ejRo3QunVr7Ny5U1h+8+ZNeHt7y92vuuZXTuUMkJFXd+/eRWRkJJKTk2XGIpG3rKhUOZ+fPHmSLbey/taxsbGBi4sLnj17VuD9f/nyReaZfzs7O8THx+e5TYcOHWBoaJhtjCN5y86dO5dtPJhMWa+FmJgYLF26NMf1ypcvj5CQEISFhQkDPiqaan7TsAJxcXHB4cOHlR2GWo1SampqCjs7O1StWhUA0L9/f2Ee0ayqV68OT09PHDhwAG3atCn0PvN7vILYsmULBgwYILQiDhw4UPj8TE1N4eLigtq1awMABg0ahA4dOgBQzCisZYk65X2m7du3o1u3bjAwMBDe09PTE/5vYmKC/v374+LFi+jbt6/cfeWVT76+vsKdjmvXrsHExAR6enowNTVF7dq14eLiAgDo1KkTUlJSEBoaioYNG+brPDQ0NPD999+jUaNGMj0Yxo8fj/DwcBw+fBgaGv/XHp45yrW2tjYmTpwIc3PzfB1HWdQx94Ds5Wp+RyQvSHkM5D4StbzyUZ7CbicSiTB//nxhbvK9e/fC2to6z+2URV3zTl3o6uqiRYsWOHfuHAYNGoSUlBS8evUKWlpaSEpKQrly5RAUFIRmzZplm+5NnrS0tDx7CU6dOlUYtC8kJATe3t748OEDJk+ejCZNmsgMwJobdcuv3MqZrCpVqoR+/fph9+7dwiCL+VlWlmT9rTNlyhRMnDixyL0G8tMYoq2tjenTp8PAwECmMS2vZfJkvRYyGwLyMxhycSizPQKuXr2K1q1bQyKRQCwW48iRI9nWWb16NZycnGBnZwcnJydhFPf09HSMHTsWlpaWkEgkcHR0RFJSEj58+AAPDw/Y2tpCLBYLrZiKRESYPn06unbtKox4+S2pVIoOHTqgSZMm6N27t8JjyI06jVI6YMAABAUFCd3T/vnnH0gkEgDAkydPkJ6eDiBjmo/AwEBhtFF5o+PK26e8ZfLIO56ZmRlOnjwJyhgLBIGBgbCxsQEA9O3bFzdu3EBcXBwA4Pjx48Lx8hqFlclSp7zPPMb27duzPRYQEREhk/cBAQGwt7cHALx9+xYWFhY57i+vfIqIiAAAfP36FXPnzhWm73F0dISenh7u3bsHICPXiQh16tQBkPsIvJGRkYiOjhZe79u3T7g+gYxGgBcvXuDQoUPQ0dER3k9ISJC5s/bXX38J56eq1Cn35JWr8kYkL2x5LG8kannlo7xcl7edPElJSULOfvz4EUuXLpWZz1rVqFPefUtd6oYuLi44d+4cACA4OBhOTk5o1qyZ0JX+3LlzcHFxQWRkJFxcXODo6Ahra2uMHTtWuJ527NgBFxcX9OrVC7a2trh+/TratWuHcePGwcnJCQ0bNsT//ve/XGfpsLOzw9q1a7Fs2TIQkcwd0qxl9LfUKb/klTMvXrwQerilpKQIM6DktUxeGfQtdcnnb7m5uck0Dg8cOBBNmjSBWCyGl5cXIiMjhWWbN29Go0aNYG9vjzVr1sjsJ7NHzYcPH1CvXj3h+vj7778hkUiQmJgIXV3dHGc8AiB3GZDxeyxz9i5HR0eh4SHrtTB69GjEx8fDzs4OTZo0AZAxS46lpaXQ0zvruSpamewR8PnzZ3Tv3h1///03nJ2dkZ6enmO3+8GDBwtTZ127dg1Dhw4VuqycOXMGDx8+hIaGBmJjY6Gjo4Ndu3ahfv36OHnypHCcnBS2lTE5ORn9+/dHtWrVcOjQIaFlNigoSEgoIyMjnDhxAnv27EG1atVw69YtIbGKm66uLvr27YtNmzbh8ePHwvtz5szB4MGD8ccff6BBgwZwdXXNcfvmzZvD09MTNjY2qFmzJlq1aiU8h+Xs7Izly5ejR48ekEqlSElJgZeXV7GdW8uWLdG1a1fY29tDU1MT1tbW8PPzA5Dxw2Lfvn3Q1tZGWloaevfuje+++w5Axp2q3FrY5e1T3rKnT5/Czc0NX79+RWJiIkxMTDBr1iz4+vrKPd78+fMxcuRI4ce/hYUFNm/eDCDjztWsWbPQsmVLaGhooHbt2tiyZYviPsAyRJ3yHsjoUq+hoZHtB9TBgwfx66+/Ct32+/TpI1QA3r59W+iuzB4eHkhPT0dKSgoGDx4sjGMhEonwxx9/4Pvvvxe+jA8ePAhdXV2kpaXh7t27Ml0sM4WFhWHUqFFIS0sDEaFBgwbYtWsXgIznQ9evXw8LCwuhnK1fvz4OHTqE9+/fo1evXsJ2ZmZm+PPPP4X9jho1CoGBgYiMjESHDh1QuXJlvHjxolDnrCjqlHvyytV27drB29sbtra2AABvb2907tw5z+3klY9z585FWFgY1q5dK8wTPWHCBAwbNkxu+Sgv1+Vt9/XrV5ibmyM5ORmxsbEwMTHB4MGDsWTJEsTGxqJdu3bQ0NBAeno6JkyYgC5duijok1U8dcq7rNSpbuji4iL88AwKCkK7du2gra0t/D8oKAhbt25FlSpVcOzYMVSqVAlpaWno1q0b9u/fLzS0BQcH486dO2jcuLFwvEePHuHKlStITU1FmzZt8Ndff+X6nH2zZs0QFRWVbawMeXdy1Sm/5JUzZ8+exbp166CpqQmpVAo3Nzf8+OOPACB3WX6/b9Upn7NKT0/HoUOHZHpH/PLLL8LNhaVLl2L+/Pnw8/PDgwcPMG/ePNy5cwc1a9bErFmzcjxm9erVsXPnTgwcOBB//fUXJk6ciLNnzxaox0xOHj58iLVr1+LPP/8UrqusOQ0Afn5+sLOzExoJoqOjsXLlSkRERKB8+fL4+vWrTM9FhSvUEIOlgLxRFAMCAsjZ2TnH7bKOzPvvv/9SmzZtyNramiQSCQGgr1+/UkxMDDVs2JB8fHxox44d9OHDByLKmP6hTp06NHnyZDp8+DAlJSUp7HwAkKOjIy1YsEDm/ZxGkk9LS6PZs2eTnZ0dNWjQQC1HFVU0KGiU0pxGxy1OJX28TDxrgHpQVN4vX76cdu7cWfSA8qmoI/AWh9IyfaCqUFTuyVMc5WNJ53omVZk1oLSTd17qVDdMTk6mChUqUFhYGLm4uNDjx4/p+fPn1KZNG3r79i1VqFCBkpOTKSEhgXx9fUksFpOtrS0ZGRnR9OnTiSijfunm5iaz37Zt29KOHTuE12vWrKFhw4YRUUYZ+O3I9p8+fSIA9P79e5kZM7Zv3662+VXc5VpmGZRX3Uad8nn79u2kp6dHEomEqlatSg0aNBCm+yPKmCXL0dGRrK2tqUGDBtSsWTMiIlq7di0NHTpUWC8sLIyy/vT99u+1ePFi0tTUpN27d2eL7dvpCfNaFhQURPXq1ZN5T19fn0JDQ2WuhW+3lUql1KRJE+rZsyf5+flReHh4jsfMxLMGFJOUlBT07NkTK1euxIMHD3DhwgUAGS1V+vr6ePDgAQYMGIAnT55ALBbjxYsXaNGihdBlyN/fH05OTjLPKWUq7Eikrq6uOHXqlNDtMDd79uzB2bNncf78+XwN5sUUN0ppQUbHVYSSPt7Lly9hZ2eH1NRUaGtrl9hxWfFQVN5PnToVgwYNUlBUeSvsCLzFxcXFBefPny/y3YOyRFG5J09xlI8lnetc5ipW1nFPCqO01A11dHTQqlUrnDhxAq9fv4aFhQUaNmyI8PBwHD9+HC1btoSOjg5Wr16NqKgoBAcH4969exgwYIDMM/WVKlX6f+zdd1gUV/s38O9SFQUsqGgQDYr0pVmwoKCIymLDKPaWR00sscSKiSUx0VjjozGaaDRGjTHWyKJRFAsqBgUVFY36iCiCWEBA6sJ5/+BlfiAddndmZ+/PdXldwu7O3HP4ztnZszNnKm2T4pOvvS8yMhJNmzZF06ZNS/y++OVaYqKOfk2ZfZCm5BkofJ+9ceMGnj17hnbt2nFzC4WHh+O///0vQkJCcPv2baxfv77MuSOAirMKANHR0WjSpAmePn1a4fNqSiKRVFqDrq4uIiIiMGvWLCQnJ8PDw6NaE+BWl1YOBHTp0gUPHjzgGragoKDUqS3Z2dnIzc3lJiTatGkT99jLly/x7t07+Pr64ttvv0Xr1q1x9+5dPH78GPXr18ewYcOwadMm/Pvvv8jIyCi1/ppedxQUFISAgAD4+Pjg9evX5T4vJSUFZmZmMDExqdEt6IQ8m6iqZsctawbciihjdtwbN25g//79FT7nr7/+wuzZswFUPANpRSqakbQq2rdvz11rmJmZiUaNGsHMzAyBgYHo0KEDbt++DaCwTb7//vsar0dotGE/qG7u36et+0FMTAx69uwJZ2dnODo6IiMjA8ePH0dAQECNlq0NWXsf9bnl0/Q+V8h5rmjmdbEdG3p7e2PNmjXo2LEj9zsPDw+sXr2am4Q1JSUF5ubmqFOnDpKSkvDnn39W1IQAgD179iAvLw9ZWVnYt29fidvhFXfr1i3MmjULCxYsKPVYQkJCpeupCqFlrahfk8lkld7pq7az37u7u1f4HLHluYiRkRG2b9+OkJAQREdHIyUlBcbGxmjcuDFyc3O5S2CBwkGFkydPcnMGFF1yW5bNmzcjJSUFN2/exLZt23Dp0qVyn1tVcXFx3OURBw8eRLNmzUpd0mhiYoKsrCzk5uYCKJzr5sWLF/D09MSXX36Jbt26VevWydWllQMBDRs2xJEjR7Bw4UJIpVK4ubmV+oObmJhgxYoV6NixI9zd3UtMKvX06VP07t0bUqkUjo6OcHR0RL9+/XDu3Dm4u7vDxcUFXbp0wZo1a2BqaqrU2mfNmoVJkyahZ8+eJSbDKG7s2LHIzMyEjY0NZsyYodT1C03x2XEBcLPjJiYmciOCNZ0dtzLz5s1DdHQ0Hjx4gM2bN2PFihVYv349AFRpdtzKDkoVCgUGDBhQanKT6qrtQWlxI0aMwIwZM3Dz5k3cvn0bhw8fLjXST9SP9oPK0X6gHJS1ylHWNJPYjg29vb3x4MGDEoN0PXr0wIMHD7jr62fOnImrV6/CwcEBY8aMKfdDfXF2dnbo2rUrnJyc4OnpWeJa7TVr1sDFxYX7xnbRokXc9efFXb9+vZZbrLmKvtm+ffs23N3dubssKJvY8lxcixYtMHfuXCxZsgR9+/aFjY0NbGxs4OnpWWLw1tHREcuWLYOnpydcXV1haGhY5vqioqKwdu1a7N27F02bNsWePXswZswYbiBCKpWic+fOSEtL4+Z3KVLRYw4ODti1axecnJywcuVK/P7776XOCGjUqBHGjh0LqVSK9u3b4+3btwgICOAmY8zLy1PtrYxrdEGBBqDrggtV1A6XL19mXbt25a4NO3r0KGOs5LVD69atY+3bt2fOzs6sffv27PLly4yxwnkIpk2bxmxtbZlUKmVubm4sKyuLJScns969ezNHR0fm5ORU4tqc2sL/v5anoKCAzZ8/n/Xv35+9e/eOffXVV9z1whcuXGCBgYFszJgxLCwsjDHG2OTJk9nSpUtZYmIi8/LyYm5ubsze3p5NmzaN5efnM8YKrz/y8vJiAQEBzNHRkV2+fJn16NGDTZ8+nbVv3561adOGzZkzhxUUFDDGyr4W7sSJE6xp06asoKCgxPU/ZbXJixcvWMuWLblrnqZMmcJt45IlS1j79u3Z3LlzS8wBERYWxhwcHNiYMWOYg4MDc3Nz4/5OxdfHGGMxMTGsVatWjDHG+vTpw3R0dJizszNzd3dnjDGWmJjIhg4dyjp06MAcHR3Z4sWLuddeunSJOTs7MwcHBzZ+/Hjm7OzMtaWpqSm7evVqmX+fcePGscmTJ7OePXsya2trNnjwYJaTk8MYY2zp0qVs6NChzN/fn1lbWzOZTMZiYmKYr68vs7a2ZsOHD+f+Fu9T9RwBtB/QfqCs/YCyVtg+lDVx9LmV9buamOdz587RsWEt9OjRgx05cqTWy6nu+7omZq2svvP9eb1CQkKYvb099/PIkSOZu7s7c3JyYn5+fiwxMZF7bOvWraxt27bMxcWFffXVV6Kdx4NUT22PkWkgQOTKa4fXr1+zpk2bsgsXLjDGCjvK169fM8ZKdqzJycnca65cucJsbGwYY4xFRUUxW1tb7kAiNTWV5efns/Xr17PJkyeXWE9ZZs2axZydncv8FxERUeZrALCkpCQWGBjIpk6dyhQKBWOMsYsXLzIrKyvGGGPLly9nP/74I9u+fTtbsmQJY4wxa2trdv78eZaVlcXS09MZY4WTcchkMvb7778zxgoPSuvWrcvu3bvHra9Hjx6sZ8+eLDc3l7179465u7tzE4iUdVD65s2bMifFKa9NyproEQBbvnw59/P7B6UAWGhoKGOMsT/++IPZ2NiUOghmrORBaVmTmPj6+rJz584xxhjLy8tjffr0YQcOHGA5OTnMwsKCnT59mjFWOIkMAO6gdO3atczIyIj17NmTBQUFsaioKG6Z48aNYx07dmTv3r1jCoWCdenShe3bt48xVnhQ+uGHH7I3b96wgoIC1r17d9apUyeWlpbG8vLymLOzMwsODmZlUeVAAO0HtB8ocz+grFHWGBNPn1tRv6upeT516hQdG9YCHwMBmpq1svrO4v1Lfn4+mzRpUomJ8Ypvw8qVK7kBy5iYGNasWTP2/PlzxhhjixYtooEAwhir/TGyVt4+kBTeW7ToNBoA0NHRKXMSnejoaHzzzTd4/fo1d+/WrKwsWFlZQaFQYOLEifD29oZMJoOOjg48PDywYcMGfP755+jevTv69u1b5vpreuqlTCbDwIEDuduoAEDHjh2RlJSEp0+f4ty5c9iyZQv09PTw8ccf4/nz50hISICHhwcUCgUWLFiA8PBwMMaQnJwMR0dH7rS2Ll26lLhFDlB4mYW+vj709fUxevRohIaGlnubHFbOvXSr2iZFJk6cWO5jrVu35m7rNmzYMEyePLnak5q8e/cOZ86cwYsXL7jfZWRk4P79+7h37x709PS40wN9fX1hZWXFPe/zzz/H6NGjcfbsWVy4cAGenp7YsWMHd23u4MGDYWRkBKDw7/Lo0SPutb6+vmjYsCEAwM3NDYaGhjA2NgYAuLq64sGDB9XaDmWg/YD2A2XuB9bW1uWuj7JGWRNTn6upeZ45c2aNXkcKFV0SpE6amrWy+k7g/275HR8fj0aNGpWYOG/fvn347bffkJ2djezsbJiZmQEovJ1gv3790Lx5cwDAp59+ipUrV9aoLkKK08o5AkjVaMpsokKfHbeqbVKdOorXI5FIoKenV2KZ5c2YCvzfwXNERAQ3ccvDhw/xxRdflLuO4po1a4YRI0bgxx9/xBdffIG9e/dyj9WpU4f7f9H9b8t7rKLnCgntB6XRflDxflBTlLXSKGua2+cKMc8dOnRQ2fYS/ggxa3zOfk9IVdFAgJYS22yiQp4dt7w2MTExwdu3byutobjyZiC1srLCkydP8PLlSwAoMZv2+zOS1q9fH97e3iUms3r+/DmePXsGW1tbKBQKbh2hoaElvmE6cuQI8vLyABROrHXr1i20adOmWtsgJLQf0H6grv2AskZZE1Ofq6l5rujsDyJMmpo1vma/J6Q6aCBAS4ltNlEhz45bXpv06tULOTk5kEql+OSTT6q07eXNQNqiRQvMnz8fHTt2hIeHR4nT5t6fkRQA9u7di4cPH8LR0RFOTk4ICAjA69evYWBggD/++AOzZ8+Gk5MT9u3bB2dnZ25Zhw8fhqOjI6RSKZydnWFoaIjly5dXqXYhov2A9gN17QeUNcqamPpcTc4z0SyanDV1z35PSLUpZ6oC4aHJAgtRO9ScsibFIbWj6rsGkIrRflB1lLXaoawJQ1XvGqCJaB8VBvo71B61IWGMJgusVGxsLN8l8Erbt58QQgghpDg6NuIXtb/yUFtqt9r+/UU7EGBmZgYjIyOMHj2a71KIhuJjdlxChIb2A6IulDWianRsSMSCskyKGBkZcXeYqC7RDgRYWloiNjYWr1694rsUXsXGxlInQQghhBCtR8eG1Vd0HLlnzx7Y2dkpdZmk5sSeZVXkTqzMzMy4iTKrS7QDAUDhTlLThiGEEEIIIeJCx4Y1Y2dnBzc3N77LIMVoQ5Ypd6pFdw0ghBBCCCGEEEK0CA0EEEIIIYQQQgghWkTUlwaQ/0OzihJNpazs0j5AVK0oY5Q1osmK55eyTFSZAcoXKQ9lQz0kjDHGdxFEdeLj42FnZ4fMzEy+SyGkxoyMjBAbG1uja+FoHyDqpKOjg4KCAr7LIKRW6tSpA4lEgqysLL5LIQJQm/fgstD7MqkKZeeOlEYDAVogPj5etLOKVsetW7cwYcIELF68GAEBAXyXU65NmzZh3759OHToEFq0aMF3OYJQmxlRAdoHakKhUKBnz54YM2YMJk2apJRlJiYmwt/fH6tWrULv3r2VskyhSUxMRGpqqkqW3aBBAzRv3lwly+YTZU14im5FJbZ+k7JWM7V9Dy6L2N+XKWu1p4rckZJoIIBohYKCAnTs2BEFBQWIjIyErq4u3yWVKyMjAzY2NujcuTMOHjzIdzlES4WFhaFnz564fv26UmfsdXR0RPv27bFr1y6lLZNoNsoaURdVZc3JyQnu7u6UNcKhfo1oAposkGiFnTt34vr169i0aZOgBwEAoH79+li9ejUOHTqEM2fO8F0O0VJyuRzNmzeHq6urUpfr7++PkJAQOn2ecChrRF1UlTWZTIYTJ05Q1giH+jWiCWgggIheamoqFi1ahNGjR6Nr1658l1MlI0eORNeuXfHZZ58hLy+P73KIFpLL5fDz84NEIlHqcmUyGV6+fInIyEilLpdoLsoaURdVZi05ORnXrl1T6nKJ5qJ+jWgCGgggords2TJkZmbiu+++47uUKpNIJNi0aRNiY2OxZcsWvsshWuZ///sf7t27B5lMpvRld+7cGQ0bNoRcLlf6sonmoawRdaGsEXWhrBFNQQMBRNTu3LmDzZs348svv9S4ifdcXV0xefJkLF26FMnJyXyXQ7SIXC6Hvr4+fHx8lL5sPT099OnThw5iCADKGlEfyhpRF8oa0RQ0EEBEizGGmTNnwsrKCrNmzeK7nBpZsWIFdHR0sHjxYr5LIVokODgYPXr0gLGxsUqWL5PJEBUVhcTERJUsn2gOyhpRF3Vk7fr165Q1Qv0a0Rg0EEBE6/Dhwzhz5gy+//57GBoa8l1OjZiZmeHrr7/Gjh076NpDohYZGRk4d+4c/P39VbaOvn37QiKRICQkRGXrIMJHWSPqoq6s6ejoUNa0HPVrRJPQQAARpczMTMyZMwcymQx+fn58l1MrU6ZMgaOjI2bMmEGzxBKVO3PmDHJzc1VybWMRMzMzeHh4IDg4WGXrIMJHWSPqos6s0Snb2o36NaJJaCCAiNKaNWuQlJSEDRs28F1Krenp6WHTpk2IiIjAnj17+C6HiJxcLke7du3Qtm1bla7H398fp0+fRk5OjkrXQ4SLskbURV1Zk8lklDUtR/0a0SQ0EEBEJy4uDqtWrcKcOXNgbW3NdzlK0aNHDwQGBmL+/PlIS0vjuxwiUowxhISEqPSbjCIymQzv3r3DhQsXVL4uIjyUNaIu6s5aRkYGLl68qPJ1EeGhfo1oGhoIIKIzd+5cNGrUSHQT7K1ZswZpaWn4+uuv+S6FiNTNmzeRkJCgloMYqVQKCwsLOo1WS1HWiLpQ1oi6UNaIpqGBACIqZ86cwaFDh7B69WrUr1+f73KUqmXLlggKCsL333+Pe/fu8V0OESG5XA5jY2N4enqqfF0SiQR+fn50EKOlKGtEXShrRF0oa0TT0EAAEY28vDx89tln6Nq1K0aOHMl3OSoxd+5ctGzZEjNnzgRjjO9yiMgEBwejd+/eMDAwUMv6ZDIZHj58iH///Vct6yPCQVkj6sJH1h48eEBZ00LUrxFNQwMBRDS2bNmC2NhYbNq0CRKJhO9yVKJOnTrYsGEDTp06hePHj/NdDhGRly9f4urVq2o5pbFIr169YGhoSN9oaBnKGlEXyhpRF8oa0UQ0EEBEITk5GUuXLsXkyZPh6urKdzkqNWDAAPj6+mL27NnIzs7muxwiEidPngRjTK2326xXrx68vLzoFkhahrJG1IWvrHl7e9OHMy1D/RrRRDQQQEQhKCgIOjo6WLFiBd+lqJxEIsHGjRsRHx+PdevW8V0OEQm5XI727dvD3Nxcrev19/fHhQsX6G4YWoSyRtSFr6zJZDLKmpahfo1oIhoIIBovMjISv/zyC77++muYmZnxXY5a2NraYubMmfj222/x9OlTvsshGk6hUODvv/9W6ymNRWQyGRQKBU6fPq32dRP1o6wRdeE7a3l5eQgNDVX7uon68Z016tdITdFAANFoBQUF+Oyzz+Do6IgpU6bwXY5aLVmyBMbGxpg/fz7fpRANd/nyZaSmpvJyEPPhhx/Czs6OTqPVEpQ1oi6UNaIulDWiqWgggGi0PXv2ICIiAps2bYKenh7f5aiViYkJVq1ahf379+PChQt8l0M0mFwuR7NmzeDu7s7L+mUyGUJCQlBQUMDL+on6UNaIulDWiLpQ1oimooEAorHS0tIwf/58BAYGokePHnyXw4uxY8eiU6dOmDFjBhQKBd/lEA0VHByMfv36QUeHn7cEmUyGFy9eICoqipf1E/WhrBF1EULWkpKSKGtaQAhZo36N1AQNBBCN9fXXXyMtLQ1r1qzhuxTe6OjoYNOmTbh16xZ++uknvsshGiguLg53796Fv78/bzV07doVpqamdGqjyFHWiLpQ1oi6UNaIJqOBAKKR7t27h++//x5BQUFo2bIl3+XwqkOHDpg4cSK++OILvH79mu9yiIaRy+XQ19dH7969eatBX18fffr0oVsgiRxljaiLkLJGH87ETUhZo36NVBcNBBCNwxjDrFmz0LJlS8ydO5fvcgRh5cqVyM/PxxdffMF3KUTDyOVyeHp6wsTEhNc6ZDIZrl27hqSkJF7rIKpDWSPqIqSsRUZG4sWLF7zWQVRHSFmjfo1UFw0EEI1z/Phx/P3339iwYQPq1KnDdzmC0LRpUyxfvhzbtm1DdHQ03+UQDZGZmYmwsDBeZjp+X79+/SCRSHDixAm+SyEqQFkj6kJZI+pCWSOajgYCiEbJzs7G7Nmz4evriwEDBvBdjqBMmzYNdnZ2+Oyzz8AY47scogHOnj2L7OxsQRzENGnSBB07dqTTaEWKskbUhbJG1IWyRjQdDQQQjbJu3TrEx8dj48aNkEgkfJcjKPr6+ti4cSPCw8Px+++/810O0QDBwcFo06YN2rVrx3cpAApPbTx16hRyc3P5LoUoGWWNqIsQs/b3339T1kRIiFmjfo1UBw0EEI3x9OlTfPvtt5g5cyZsbW35LkeQfHx8EBAQgHnz5iEjI4PvcoiAMcYgl8shk8kEM6gmk8mQnp6O8PBwvkshSkRZI+pCWSPqQlkjYkADAURjzJ8/H8bGxliyZAnfpQjaunXr8ObNG3z77bd8l0IELCYmBs+ePeP1lkfvc3V1RYsWLejURpGhrBF1oawRdaGsETGggQCiEc6fP4/9+/fju+++431mVqFr3bo1FixYgHXr1uHhw4d8l0MESi6Xo169eujevTvfpXAkEgn8/PzoFkgiQ1kj6iLkrNGHM3ERctaoXyNVRQMBRPAUCgU+++wzdOrUCWPGjOG7HI0wf/58mJubY/bs2XyXQgRKLpejd+/eMDQ05LuUEmQyGf79918axBIRyhpRFyFn7f79+3j06BHfpRAlEXLWqF8jVUUDAUTwfvrpJ8TExGDTpk3Q0aHIVoWRkRHWrVuH4OBghISE8F0OEZjXr1/jypUrgpjp+H0+Pj4wMDCgb89EgrJG1IWyRtSFskbEgj5VEUF79eoVvvjiC0ycOBEdOnTguxyNMmTIEPTs2ROzZs1CTk4O3+UQAfn7779RUFAAPz8/vksppX79+ujRowcdxIgEZY2oC2WNqAtljYgFDQQQQfvyyy+Rn59PE9/VgEQiwcaNG/G///0PGzdu5LscIiDBwcHcpEJCJJPJcP78ebrzhQhQ1oi6aELWzp07R1kTAU3IGvVrpCpoIIAIVnR0NLZt24bly5ejadOmfJejkRwdHTFt2jR8/fXXeP78Od/lEAFQKBQ4efKkoGY6fp9MJkNubi5CQ0P5LoXUAmWNqIsmZM3f35+yJgKakDXq10hV0UAAESTGGD777DPY2dlh2rRpfJej0ZYtW4Y6depg4cKFfJdCBCAiIgIpKSmCvLaxSNu2bWFjY0OnNmo4yhpRF03IWps2bShrIqAJWaN+jVQVDQQQQfr9998RHh6OjRs3Ql9fn+9yNFrDhg2xcuVK/Pbbb7h8+TLf5RCeyeVyNGnSRPBzbshkMsjlcjDG+C6F1BBljaiLJmUtJCSEsqbBNClr1K+RytBAABGcjIwMzJs3DwEBAfDx8eG7HFGYMGEC3N3dMWPGDOTn5/NdDuGRXC5Hv379BH8HDplMhsTERERHR/NdCqkhyhpRF03K2vPnz3Hjxg2+SyE1pElZo36NVEbYKSZa6dtvv8WbN2+wbt06vksRDV1dXWzatAlRUVH45Zdf+C6H8CQ+Ph4xMTGCPqWxSLdu3WBsbEynNmooyhpRF8oaURfKGhEbGggggvLw4UOsW7cOCxYsQOvWrfkuR1Q6d+6MsWPHIigoCCkpKXyXQ3ggl8uhq6sLX19fvkuplIGBAXx9fekgRkNR1oi6aGLWgoOD+S6F1IAmZo36NVIRGggggjJ79myYm5tj/vz5fJciSqtWrUJ2djaWLl3KdymEB3K5HJ6enmjQoAHfpVSJTCbDP//8g5cvX/JdCqkmyhpRF03Lmr+/P2VNQ2la1qhfI5WhgQAiGCEhIQgODsa6detgZGTEdzmi1Lx5cyxZsgRbtmxBTEwM3+UQNcrKysLZs2c14pTGIn5+fmCM4cSJE3yXQqqBskbURROz1q9fP8qaBtLErFG/RipDAwFEEHJycjBr1iz07NkTQ4YM4bscUZs5cybatGmDzz77jGaT1SJhYWHIysrSqIOYZs2aoUOHDnRqo4ahrBF1oawRdaGsETGigQAiCBs3bsT//vc/bNy4ERKJhO9yRM3AwAAbN27EuXPncPDgQb7LIWoil8vx4YcfwtbWlu9SqkUmk+Hvv/9GXl4e36WQKqKsEXWhrBF1oawRMaKBAMK758+f4+uvv8a0adPg6OjIdzlaoW/fvujfvz8+//xzZGZm8l0OUTHGGORyOWQymcYNtMlkMrx9+xaXLl3iuxRSBZQ1oi5iyNrly5f5LoVUgRiyRv0aKQsNBBDeLViwAHXq1MHy5cv5LkWrbNiwAS9evMCqVav4LoWo2J07d/DkyRONOqWxiJubG5o1a0anNmoIyhpRFzFkje4eoBnEkDXq10hZaCCA8OrSpUvYs2cPVq5cqTGzsIpFmzZtMHfuXKxevRqPHz/muxyiQnK5HEZGRvDy8uK7lGrT0dGBn58fHcRoCMoaURfKGlEXyhoRKxoIILzJz8/HZ599Bnd3d0yYMIHvcrTSokWLYGZmhs8//5zvUogKyeVy+Pj4oE6dOnyXUiP+/v6IjY2lASsNQFkj6kJZI+pCWSNiRQMBhDe//PILoqKisGnTJujq6vJdjlaqX78+1qxZgyNHjuD06dN8l0NUICUlBZcvX9bIUxqL9O7dG/r6+vSNhsBR1oi6UNaIulDWiJjRQADhRUpKCoKCgjB27Fh07tyZ73K02vDhw+Hp6YmZM2fSrLIi9PfffyM/Px9+fn58l1JjxsbG6N69Ox3ECBxljagLZY2oC2WNiBkNBBBeLF26FNnZ2TRRnQBIJBL897//xf3797F582a+yyFKJpfL4ezsDAsLC75LqRWZTIawsDC8e/eO71JIOShrRF0oa0RdKGtEzGgggKhdTEwMtmzZgiVLlqB58+Z8l0MAuLi4YMqUKVi2bBlevHjBdzlESfLz83HixAmNPqWxiEwmQ05ODs6cOcN3KaQMlDWiLmLM2tmzZ/kuhZRBjFmjfo0URwMBRK0YY/jss8/Qpk0bzJw5k+9ySDFff/019PT0sGjRIr5LIUpy9epVvH79WhQHMe3atUPbtm3p1EaBoqwRdRFj1ug2gsIkxqxRv0aKo4EAonL5+flYtmwZ0tPTcfDgQZw7dw4bN26EgYEB36WRYho3bowVK1Zg586d+Oeff/D06VOsX7+e77JILcjlcjRu3BidOnXiuxSl8Pf3R0hICJKSktCuXTs8e/aM75LI/0dZI+oi1qwxxvguhbxHrFmjfo0UoYEAonIPHz7E8uXLERERgc8//xz9+/dH3759+S6LlGHy5MlwdnbGjBkzcPbsWXz++efIycnhuyxSQ3K5HP369dP4u3LExsZi2rRp6NmzJ549e4b9+/fjwYMH0NGhtzChoKwRdRFL1orIZDI8e/YMt27d4rsU8h6xZI36NVIeSgBRuTdv3gAADh06hBcvXuCrr77C+fPnea6KvI8xhjNnzmDdunX4559/EBUVBaDwDg9E8zx79gw3b94UxSmNOjo6+PXXX7F582bUq1cPwcHBaNy4Mc0xIhCUNaIuYspake7du6N+/fp0yrbAiClr1K+R8tBAAFG5og+SO3fuxKBBg9C/f3989NFHdKs6gUlOTkb//v3x2WefoXfv3ti9ezcAGgjQVCEhIdDV1UWfPn34LqXWbGxscPToUZw7dw4NGjRAdHQ0pFIpJBIJ36URUNaI+ogpa0UMDAzQu3dvGggQGDFljfo1Uh4aCCAqV/RBUkdHBwcOHICDgwOuXbsGfX19nisjxTVr1gyRkZEwMjJCaGgo0tPTAdBAgKaSy+Xo0qULGjZsyHcpSuHj44Pff/8dz58/x5s3b2Btbc13SeT/o6wRdRFb1orIZDJERETg1atXfJdC/j+xZY36NVIWGgggKhcZGQkA0NXVxa5du3DixAm0atWK56pIWaRSKa5cuYLVq1dzI8V3797luSpSVXl5eUhLS0N2djZCQ0NFcUpjcQEBAVi3bh0AoFGjRjxXo90oa0RdxJ41APDz80NBQQFOnjyJ/Px8pKam8l2SVhJ71qhfI++jgQCict7e3ujUqRMePHiAcePG0alIAqenp4e5c+fi5s2bcHZ2RocOHfguiVTR2rVr0bt3b5w7dw6ZmZnw9/fnuySlmz17NkJDQ7Fs2TK+S9FqlDWiLtqQtebNm8Pd3R1yuRzbtm0TzSz1mkYbskb9GimOBgKIyg0cOBARERE0KYmGsbe3x40bN+Ds7Mx3KaSKGjdujOvXr+PYsWNo1aoV7t69i86dOyM/P5/v0pSqV69eMDQ05LsMrUZZI+oi9qytWLECEydORL9+/XDy5EmEh4fDzMyM77K0ktizVoT6NVKEBgIIIUQkpFIp8vPz8ddff8HJyQmjRo2CtbW1xt/6iAgPZY2oi9iz1qFDB+zevRvR0dFITU3F1atXIZVK+S5LK4k9a4S8T8IYY3wXwZf4+HiamIWUKTExkbtGr0GDBnQ2A1GZ4lmrrezsbPznP/8BUHiJh6urKzZv3gw9PT2lLJ9oNsoa0USZmZnw9PQEUDjDfrdu3XDy5ElRTTi8d+9ejB49GnXr1kVOTg42b96MTz/9lO+ytE5GRgaMjY0BAIaGhvDx8cHGjRvx9u1bnisjfDMzM4OlpSXfZSid1g4ExMfHw87ODpmZmXyXQgRIR0cHBQUFfJdBtABljagLZY2IQd26dXHv3j3RHZRv3rwZM2bMAACEh4eja9euPFeknczMzPD69Wt069YNO3bsgKurK31WIDAyMkJsbKzo+h2tHbp/9eoVMjMzsWfPHtjZ2fFdDhGQ2NhYjB49mrJBVI6yRtSFskY0WWBgIF6+fIk1a9Zg8uTJePXqlegOyKdPn45r167ht99+g5OTE9/laK0WLVoAAIKDg/Ho0SP6rEC4908x9jtaOxBQxM7ODm5ubnyXQQSIskHUhbJG1IWyRjTRvXv3wBjDrVu3+C5FpXbt2oWffvoJBgYGfJeitaKjo8EYK3GZE/WbRKy0fiCAEEIIIYQIlzZN1kaDAPzSpqwRQncNIIQQQgghhBBCtAidEUAIURq6E0f1xMbG8l2C1tD2bFLWhEnbc1ldRTm+ePGiqDKtzrsTKfPuIXxTdbuJKWOElIUGAgROIpEgJSUFDRo04GX9nTp1Qk5ODgBAoVDgzp07uHnzJqRSKYKCgnD48GEYGhpCX18f33zzDfr06VPmch48eIBx48bh1atXMDU1xa5du+Dg4KDOTSGVqG3W6E4c2ikuLg4uLi6COLCUy+VYsmQJbt++jU8//RTff/89AMomEZ5z586hb9++yM/Ph0Kh4LscjTNnzhy6A0YN0d1DNIeQ3l8B4NChQ1i2bBmKbjgXHByM1q1b44cffsDWrVuhq6sLhUKByZMn47PPPivx2uTkZDg5OaFz5844evQoACArKwtTpkxBVFQUAMDKygo7duxAkyZNUFBQgLlz5+LkyZPQ09ND48aN8fPPP6Nt27al6iooKMDMmTMREhICiUSCWbNmYfr06aptDJGggQCRS01NhbGxcY2vebp69Sr3/4MHD2L58uWQSqUAAE9PT3z55ZeoW7cubt68ie7du+P58+eoV69eqeVMmTIFkydPxvjx43Hw4EGMHz8ekZGRNdsoIkiPHz+m2XWrqWgmWlIoLy8PWVlZMDExqdHrra2t8csvv+DPP/9ERkYG93u6S4z4sqZQKEpM5sWH169fo3HjxjV+vaWlJR48eKDVuawJugNGzVHbVY+Y+s3avr9GR0dj8eLFOHv2LFq0aIH09HTus8Xo0aMxbdo0AEBaWhocHR3h6ekJV1dX7vVTpkyBv78/Xr9+zf1u27ZtyMzMRExMDCQSCSZNmoQ1a9Zg9erV+Ouvv3Dp0iXcvHkT+vr6WLFiBYKCgnDgwIFSte3Zswd3797Fv//+i7dv38LV1RXe3t70hWMV0ECAhmCMYeHChYiNjcX+/fthZGRU7nOzsrIQHByMffv2ITo6Gnfu3Cnzw3l17dixAx9//DH3c79+/bj/Ozk5gTGGly9fllpXcnIyrl27hlOnTgEAhgwZgunTp+Phw4dljuwRftU0axEREQBodl1Nd+XKFcybNw/p6elgjOHrr7/GwIEDce3aNXz22WfIyMhAnTp1sGHDhlL3uc7KysL48eMRExMDfX19NGvWjNvvy8MYw4ULF7Bv3z6cOHECe/fuhaenZ41qb9euHQDgyJEjZT5O2dRsEokES5YsQUhICLy8vDBv3jx88sknePDgARhjmDFjBqZMmYJTp05h7dq1OHXqFNLS0tC4cWP88MMPmDx5Mnbv3o1z587hl19+wYoVK7B3714YGhoCAI4dO4ZWrVpVWMO9e/ewb98+HDhwAB9//DHmzZtX6+2iXNYMtVvNUdvxQ5PfX9etW4c5c+Zwt1c0NjbmHjM1NeX+/+7dO+Tl5ZV47Y4dO/Dhhx9CKpVyZwMAhX16ZmYm8vLyoKOjg4yMDO7WmRKJBDk5OcjOzoaenh7S0tJgYWFRZm1//PEHJk2aBF1dXTRq1AiBgYH4/fffsWLFihptqzahgQANkJOTgxEjRqBx48Y4cuRImd/u5+fnIzQ0FPv27cOFCxfQu3dvzJgxA15eXtDRKZwTcs2aNdi7d2+Z61i6dCkGDx5cbg1Pnz7F+fPn8dtvv5X5+M6dO2FlZVXmQdTTp0/RvHlz7tsbiUQCS0tLxMfH00CAwNQma4sXL0aHDh14qJooy5s3bzBo0CAcPHgQnp6eKCgoQGpqKnJzcxEQEICff/4Zffr0QXh4OIYMGYKHDx+WeP3JkyeRmpqKu3fvcssrT3R0NPbt24cjR47A0dERw4cPx4YNG7iBp7CwMMyePbvM18pkMnzzzTdK2mqiSXR1dbmzyQIDA2FjY4PDhw8jOTkZ7u7ucHZ2hqenJ4YPH46cnByEhYWhQ4cOCA0NxeTJk3H69Gn069cPKSkpWLt2LRITE1G3bl1kZmZy75Xve/bsGfbv34/9+/fDwMAAw4cPx7lz52Bubg4ASE9PL/fgulmzZvj777/LXS4hRDto+vvr3bt30bp1a/To0QNpaWnw9/fHsmXLuOPEgwcPYunSpXj48CG+/fZb7myAx48fY+vWrbhw4QL++OOPEsucMmUKLl++jKZNm0JXVxedOnXiTunv378/wsLCYG5uDmNjY3zwwQc4f/58mTXHx8eX+PzRunVr7sspUjEaCNAAMpkMAwcOxJdfflnuc9zd3ZGUlISNGzdi+/bt0NfXL/WcefPm1fjbi127dsHf3x9mZmalHjtz5gyWL1+O06dPQyKR1Gj5RBhqk7Wia7yI5rpy5QpsbGy4DzU6Ojpo1KgRYmJioKOjw80B0q1bNzRr1gw3btwoMULv7OyM2NhYTJ06FT169ICfn1+Z65k1axZ+/PFHLFmyBNevXy/xbUIRb29v3LhxQ/kbSTTaxIkTuf+Hhobi+vXrAICmTZsiICAAoaGh8PDwgIuLCy5duoTQ0FAsXLiQu6b87NmzWLNmDUxMTGBtbY3Ro0fD19cXMpmszG+bDh8+jI8++ggjRozAoUOHyhzsNjY2rnZW3dzccOLECXh5eVXrdYQQzaTp768KhQLR0dE4efIkCgoKMGDAAPz444/cB/ePPvoIH330EeLi4jB48GD4+/ujXbt2mDhxIjZv3oy6deuWWuapU6dQUFCApKQk6OjoYPz48ViyZAlWrFiBa9eu4fbt20hISICJiQkWLlyITz75BHv27KlW3aRidPtADdCzZ0+cPn0aaWlp5T5n+/btGD58OIKCgjBq1CgcOXKEm+SvyJo1a+Di4lLmv/JOpQUKTy3auXNnicsCipw/fx4TJkzA8ePHYWNjU+brW7ZsicTERG5CJMYY4uPjYWlpWZXNJ2qkrKwR8Str0M/Kygp3795F3759cenSJTg6OiIlJaXU8+bMmYOlS5fiwIEDGDhwILZt21Zq5vSwsLBy+6vFixerbLuIsNWvX7/cx4pn0sfHB6Ghobhw4QJ69eoFJycn7NmzBw0bNoS5uTl0dXURERGBWbNmITk5GR4eHrh48WKpZfbu3Rs///wzkpKS4O/vj6+++gr//vtvieekp6eXm9XyJtA1MTEpcWotIYQUEeL7q6WlJYYMGYK6deuiXr16CAgIKPNb99atW6NTp04IDg5GWloabt26hcDAQLRu3Rpz587FqVOn0KtXLwDATz/9hMGDB6NOnTowMDDAqFGjEBYWBgDYvXs3evbsiQYNGkBHRwfjxo3jHiurtidPnnA/x8XF0WeMqmJa6vr16wwAu379Ot+lVAgAS0lJYRs2bGAdOnRgr169qvD5+fn57PTp02zChAnM0tKSjR07lmVnZ9eqhtDQUNayZUuWn59f4vfnz59nLVu2ZFFRUZUuo0ePHmznzp2MMcb+/PNP5u7uXquaVElTsqFstc2aTCbTynZjjLF+/fqxe/fuVfq8L7/8ku3Zs4f7WWhZe/PmDTM3N2cXLlxgjBX+jV+/fs1ycnJYy5Yt2alTpxhjjF26dIk1a9aMpaens8ePHzNTU1PGGGNPnz5lGRkZjDHGvebmzZsVrvPmzZtswYIFrG3btqxv375VasfKLF26lM2cOZP7WWjtXBtiyVpNFPVRRYYNG8aCgoIYY4wlJyezli1bsoiICMYYY1evXmWtWrVi3bt3Z4wxtnnzZtaqVSsuF2lpaSwhIYFbVmBgINu4cWOF63/+/Dlbv34969ChA3N3d2fHjx+v8bY8f/6cRUZGCvpvUtOs1VZERASTSqXM2tqaeXt7s2fPnpV4XOhZFmq7MSbcthNqmymzvTT9/XXv3r0sMDCQ5efns7y8PDZgwAC2evVqxhhjd+7c4Z6XnJzMrK2tue0pbufOnWzgwIHczzNmzGDjxo1jBQUFrKCggH366ads8uTJjDHG1q1bx3r16sVycnIYY4ytWrWK+fr6llnbzp07Wc+ePZlCoWCvX79mlpaW7NatWzXe1vcJdb9RBhoIEPgftfiBz08//cSkUilLTEys0muzs7PZoUOHWFZWVq1qGDFiBFuyZEmp37dt25Y1bdqUOTs7c/+Kdrxjx46xjz/+mHvuvXv3mIeHB7O2tmbu7u5K3UGVTVOyoWy1zdqaNWu0st1qQ4hZu3LlCuvSpQtzcnJizs7O7K+//mKMMRYZGck6d+7MnJycWIcOHdjFixcZY6zEgUpISAhzdnZmUqmU2dvbcx/SqqKgoICFh4ezBw8e1Lj20NBQ9sEHHzBjY2NWv3599sEHH7Bjx44Jsp3VTQxt8P5AQFJSEhs8eDBzdHRkDg4ObOvWrdxj+fn5rEGDBmz58uWMMcbu37/PAHB5fvr0KevUqRNzdHRkTk5OLCAggKWmpla5ln///ZfbB2pi06ZN7MMPP9T4v4my5efnszZt2rCzZ88yxhhbs2YN++ijj0o8RwxZVraqtBtj1HbF8ZE1TX5/zc/PZ59//jmztbVlDg4O7JNPPuE+pE+ePJnZ2dlx9f3www9lLuP9gYDXr1+zIUOGMHt7e2Zvb88CAgLYy5cvGWOFx5X/+c9/mK2tLXNycmK9e/dmjx494l7r7OzMDeYqFAo2depU9uGHHzIrKyv2/fff13g7yyLm/YYGAkT4RyW1Q9moGbG329GjR5mtrS2TSqVs/vz5rHHjxuzx48eMMcZatWrFoqOjGWOFZ798/vnnrFu3bszKyopNmTKFW8a4cePYhg0buJ/F3mZCoWntTFnTDkL4m6gia7Xxzz//MBsbG+7ntLQ0ZmhoWOILDWq30qrSbozx23aa2GZCyBrhn5hzQJMFEkJIJZKTkzFx4kRcunQJtra22LlzZ4l74b7v0aNHCAsLQ15eHuzt7XHlyhV07txZjRUTTUVZI+qijqwFBgbi/v37ZT52/PhxtGzZssTv3p/929jYGCYmJnj+/DmsrKyqsXWqQ+1WfdRmhAgTDQQQQkglIiIiIJVKYWtrCwAYN24cPvnkk3KfHxgYCD09Pejp6cHFxQWPHj2iD2ekSihrRF3UkbX3bxcmBtRu1UdtRogw0V0DNISfn1+5I53FLVmyBHv37lXaeq9evQpnZ2e0a9cOPXv2REJCQpnP27VrF0xNTblZR729vUs9JysrC/b29nBxceF+V1BQgDlz5sDe3h5SqRTe3t6l7p1K1IuvrIlJnTp1uP/r6upyd8wgJVHWao+yVjVCfw/VBDXJWmBgYLmzkz99+rTU89+f/Ts9PR1v375FixYtlLMRPKB2qz5qs8ppSp9W1rF/EcYYd2eAIjExMejevTtsbW3h6OiIiRMnIisrS2n1k5LojAANERISUqXnffXVV0pbZ0FBAUaNGoWff/4Z3t7eWLt2LWbNmoU///yzzOd7e3vj6NGj5S5vwYIF6Nq1KyIjI7nf/fXXX7h06RJu3rwJfX19rFixAkFBQThw4IDStoNUDx9ZEzoPDw/cunUL9+/fh42NDfbs2YPc3Fy+y9J4lLXSKGuqoQnvoeqmjqxV91tad3d35OXlISwsDN7e3ti2bRv69+9f4oMh36jdqo/aTPk0pU8r69i/yIYNG9CmTRtERUVxv6tTpw42b94MqVSK/Px8jBw5Et999x2WLVumtO0g/4fOCBCQY8eOwc7ODs7OzliwYAHMzMwQFxcHoPC+nDdu3AAAeHl5Ye7cufD09ESbNm1KnF41fvx4fP/990qp5/r169DT0+O+3Z8yZQqOHz+O7Ozsai8rNDQUCQkJGDVqVInfSyQS5OTkIDs7G4wxpKWlwcLCQin1k/IJLWtC17RpU2zfvh2DBg2Ci4sLYmJiUL9+/RKj2KRslLXqoazVnNCypsz3UFUQYtZ0dHSwZ88ezJw5E+3atUNwcDA2bNjAPe7n54e7d+/yVh+gue127do13urT1DbjO2ua3qeVd+wPAHfu3MHRo0excOHCEr+3traGVCoFUHg2SIcOHbhtJspHZwQIhBgmUgkPD4eLiwuMjIwwe/ZsDB06FACQmpqK+fPn4+TJk6U61f79+yMsLAzm5uYwNjbGBx98gPPnz1e4HaR2aDKymvHx8cHgwYMBAEePHkVwcDB3EFP8TercuXMlXnfw4EHu/7t27VJxlcIixH5NE1DWqk+IWdOEyciEmLXOnTvj1q1bZT4WEhJS4ttDvmhiuwHgte00sc34bC9N79MqOvbPy8vDpEmTsGPHDujq6pZb37t377B9+3asXLmywu0gNUcDAQKh6ROp+Pv7Y9iwYTAyMkJsbCx8fX3RsmVLeHh4YPr06QgKCkLTpk1LdQbXrl3D7du3kZCQABMTEyxcuBCffPIJ9uzZo7JatR1NRlYzmzZtwh9//IH8/HyYmJjQNetVoOn9Gl8oa9VHWasZylrNULtVH7VZ9Wh6n1bRsf/y5csREBAAOzu7cr/tz83NRWBgIHx9fbkBJKJ8NBCgoWo6kUp1Rv6qM5GKmZkZ9387Ozv4+fnh0qVL8PDwQHh4OMLDwzF37lxkZ2fjzZs3sLGxwf3797F79+4SE4WMGzcOvr6+lW4LUR+ajKxQUFAQgoKC+C5D1NTRr2kCyprqCe09lC+UtZqhdqs+ajPVElqfVtGx//nz5xEfH4/NmzdDoVAgLS0NrVu3RmRkJJo0aYK8vDwEBgaiefPm2LhxY1WbgNQADQQIhKZPpJKQkIAPPvgAAPDixQucPXsWgYGBAEqf8jVr1izuuiYrKyuEhIRg7ty5MDAwQHBwMBwdHWu2gaRKaDIy/i1btgypqakYO3Ys36WolBD7NSJOQsyapk9GVhtFfZy2zO2hLNRuNSPGdtP0Pq2iY/+LFy+WeJ6Liwv3fIVCgeHDh6NRo0b46aefIJFIqr1dpOpoIEAgik+kYmhoiN69ewtmIpUpU6YgOzsbLVq0wG+//cY97ufnh6+++grt27fHDz/8gGPHjkFfXx8FBQWYPXs2evbsWek6pk2bhtjYWDg7O0NfXx/m5ubYunWrKjdL6wkxa0ScKGtEXYSYtcreQ2fMmMFbbYQQYdPEPq3454Ka+uOPP3D48GFIpVK4uroCALp27Yoffvih1vWTMjAtdf36dQaAXb9+ne9SOGlpadz/jxw5wmxtbXmsRnsJMRvKpoqsaVq7ZWZmsmHDhjE7OzsmlUpZ7969GWOMJSYmMi8vL+bm5sbs7e3ZtGnTWH5+PmOMsZ07d7JevXqx4cOHMzs7O9a5c2d2584dNmjQIGZra8t69+7N0tPTGWOMLV26lAUEBDBvb29mY2PD/P392atXr7jHZs6cqXFtVhNC6NeE1s58ZO/MmTOCagNVEELWqkPVuRRCH1dkzZo1rEOHDszV1ZX16dOHxcXFMcYY++uvv5iTkxNzdnZmDg4O7OjRo5VuF7VbzdqNMdW2nRjbje/3Dk3r08SK7xyoEp0RICA0kQpRF8oacPLkSaSmpnKT2Lx58wYA0KBBAxw/fhz169dHfn4+Bg4ciAMHDmD48OEAgMjISMTExMDS0hJjxoxB//79cfnyZTRr1gz+/v749ddfMW3aNACFp7/dunUL5ubmmDp1KhYtWoSffvqJnw3mCWWtND6yt3nzZn42Vo0oayUJpY/bt28f7t+/jytXrkBXVxe//fYbpk6dCrlcji+++ALbtm1D586dUVBQgLS0tFLbkZ6eDk9PT+7nzMxMlbRXEbG2G6DathNju6k6a5WhPo2oGg0ECAhNpELUhbIGODs7IzY2FlOnTkWPHj3g5+cHACgoKMCCBQsQHh4OxhiSk5Ph6OjIHbR07twZlpaWAID27dsjLy8PzZo1AwB06NABDx484NYhk8lgbm4OAJg8eTICAgLUuYmCQFkrjY/syWQydW4iLyhrJQmljzt69CgiIyPh7u4OAMjPz+ce69WrF2bOnImPPvoIvr6+cHFxKfV6Y2Nj7tpioPAWeEXLUgWxthug2rYTY7upOmuVoT6NqBoNBBBCtJKVlRXu3r2Ls2fPIjQ0FPPnz8eNGzfwww8/IDk5GVevXkWdOnUwZ84cZGdnc697f2be6szUS5PeEICyR9RDKDljjGHRokWYPHlyqcfWr1+PO3fuICwsDOPGjcOoUaMwf/78Es9R9xkBYm03QLVtJ8Z24/uMAEJUTYfvAojqLVu2DLNmzeJt/QUFBZg7dy4cHR1ha2uLjz/+uMTMp9999x3s7e3h4uICDw8P/PPPP9xjH330EVq0aAGJRILU1FQeqidi9ezZM0gkEgwYMABr164FYwxPnz5FSkoKzM3NUadOHSQlJeHPP/+s8TpCQkLw4sULAMD27dvh4+OjrPK1Ht/9Wm3wkb1OnTopq3yto6lZE0ofN2jQIGzdupU7VTwvLw/R0dEAgHv37sHBwQHTp0/Hp59+ioiIiFKvL/qGtujf/v37a1xvVYi13VTddmJsN1VnjU9C6NdiYmLg5eUFOzs72NnZ4fDhwwCAK1euwMXFBS4uLnBwcMCUKVOQk5MDADh79iw6duwIe3t7ODg4YP78+SgoKOBzMzQanRFAVG7Hjh2IiopCVFQU9PX1MXnyZGzcuBHz5s3DjRs3sGXLFty5cwf169fHnj17MH36dG4w4JNPPsGWLVu408QIUZaYmBgsWrQIjDEoFAqMGTMGUqmUO23QwcEBLVq0qNWHd09PT4wcORIJCQmwtrbGrl27lLcBRGPxkb3Zs2dzB1lEOwiljxs1ahRev34Nb29vAIW3B5s4cSJcXV0RFBSE+/fvw8DAAEZGRvjxxx9rXIuyULvVDLUbqY7MzEwMHDgQu3fvRrdu3ZCfn88N3jg7OyMyMpK7E9mQIUOwZcsWzJ49Gw0bNsT+/fthZWWF7Oxs+Pj4YPfu3Rg/fjy/G6Sp1D07oVCoewZIMc6mWlXTpk1j33zzDffzoUOHmJOTE2OMsRs3brDmzZuzpKQkxhhjmzZtYoMHDy61DAAsJSVFKfVURtNnB+Ura5rebsr2/n5XFk1vM03p19q2bavR7VxdZWWPsia891BN/5tUpY9TBWq3mtPktuOj3egOFar7bPDzzz+zESNGVPq8rKws1qdPH7Zhw4YyH582bRpbunSpUmoqjybvN5WhSwPUpPhsqjdv3uRONyqaTfX69eu4desW4uLicODAAe51kZGR+O6773D37l20adMG/fv3x9atWxEbGwsDAwP8+uuv3HMvXryIffv24d69e2jZsiUWLVpUqo7is6lGRUVh1KhRmDp1KgBws6neuHEDt27dQo8ePUq9Pj09nTtd5/1/ffr0KXPb3d3d8ddffyEtLQ15eXk4cOAA4uLiABSO+s2ePRsffvghLCwssGHDBmzatKnG7UyEkzUifkLJWmX92uLFi1XcEkTVNCVr1XkPLZosjRCincTYr1X1s8Hdu3dhaGgIf39/uLi4YOzYsXj58iX3eFxcHJydnWFmZgZTU1OunuKSkpJw8OBB+Pv7V7HFyfvo0gA1EeNsqlU1fvx4PHnyBD169EDdunXh4+ODU6dOAQAeP36Mw4cP4+HDh2jRogU2b96MwMBAhIeHV2sd5P/wlbVPPvlEnZspeMuWLeO7BJXTlH5tzZo1Kth64RJj9jQla5o0I3ltiTFn6kDtVjNibDcx9mtVpVAoEBoaioiICLRo0QJBQUH49NNPcfDgQQBA69atcfPmTWRkZGD06NE4fPhwicHTtLQ09O/fH/Pnz0f79u2rtW7yf+iMADUpmk21b9++uHTpEhwdHZGSkoL169dzs6neunULI0eOVMtsqkUTocTExCAmJgZA4WyqO3fuhJGREcaNG4fVq1eXen1NRv0kEgmWLVuG6OhoXL58mZvgAwAOHToEJycntGjRAgAwYcIEXLp0qcRkgqR6hJI1In5CyVpl/ZoYDyC1jaZkrTrvoXRGACHaTYz9WlU/G1haWsLb2xsffPABJBIJRo8eXebkjfXr18fw4cOxd+/eEuvr27cvBg4ciDlz5pS7raRyNBCgJmKcTfX9f3///XeZdWVnZyMlJQUA8OrVK6xatYq7XYuVlRUuXbqEjIwMAEBwcDDatWsHAwODGreDthNK1oj4CSVrlfVrbdq0qfH6iTBoStZoRnJCSFWJsV+r6meDYcOGITIyEmlpaVydzs7OAICHDx8iLy8PAJCbm4sjR45AKpUCADIyMtC3b1/07dsXX3zxRY3bhRSiSwPURJtnU3379i28vLygo6ODgoICzJw5E/379wcADB48GJGRkWjfvj0MDQ1Rr1497Nu3j3utTCbDzZs3AQAODg6wtrbGuXPnlFKXWPGVtSdPnihxK5Rj2bJlSE1Nxffff89rHRKJBI6Ojvjuu++4U//Onz+PTz/9lNe6aktT+rXqnrKoTkLJqNBpStb4npFcKHl6v8+LjIzEzJkzcePGDfj6+uLo0aMlnr9ixQrs3LkTADB8+HB88803aqtVE9vsm2++KfHh8H//+x/+85//YP369RWu4/nz5yqpXahtuH//fqxatYr7hnzChAn4/PPPAQDnzp1Dv379YGNjw73+ypUrqFu3rtrq1eZ+zdLSEkFBQejSpQt0dHTwwQcf4KeffgJQeIvA//73v9zZDb169cKXX34JANi4cSP++ecfvHv3jrsTztChQ2kuoJpS8+SEgiG2GSD5nIVWbMSWDWUrL2tCbDeh7Bd4764XCQkJrFWrVuzgwYOCazMhUdbfT4jZLKKujAq5DYRALDOSC7XPe/r0Kbt69SrbunUrGzhwYInnnj9/ntnb27OMjAyWnZ3N3N3dWXBwcLnLVna7aWKbFZednc0aNWrErl27Vuk6zp49q5J+QKhtGB4ezhITExljjKWmprI2bdqwsLAwxhhjYWFhzNnZucLlaXq/KZS/i6bT9BxUhC4NIIQI3jfffIPp06dzP2dkZKBRo0Z4+fIlYmJi0K1bN7i5ucHe3h4rVqwocxm7du3CoEGDuJ+Dg4Ph5eXF/fzbb7+hU6dOcHNzQ/fu3bkzUVRhy5YtGDlyJD788EOVrYOol9gySvgltjxZWFigY8eOMDQ0LPXYH3/8gTFjxqBevXowNDTExIkT8fvvv1d7HdrUZsUdPXoULVu2rNLEk6amphU+LrY27Nq1KzdRnqmpKWxtbbm7VhFC6NIA0aDJsIi68JG1sWPHwt3dHevWrYOhoSH+/PNPeHt7o0mTJqhTpw7OnDkDQ0NDZGVloUuXLvDx8YGHh0eVl3/p0iX8/vvvuHDhAgwNDXHx4kWMHDkSd+7cKfXc2bNnIywsrMzlbNu2DZ06dap0fXfv3kWrVq3oTguV0KR+TegZzczMrPU2ipnQsib0PBWpap9Xkfj4eHTr1o37uXXr1jWaP0Gb2qy4HTt24OOPP1bKssTchnfv3sWVK1ewdetW7nePHj2Cm5sbdHV1MWHChDJvUafJhNavEeGhgQBCiOC1bNkSrq6u+OuvvzB06FDs2rUL8+bNAwBkZWVh6tSpuHHjBnR0dPD06VPcuHGjWgcnx44dw82bN0scWLx58wZZWVmlrhfcsGFDrbdHoVDgwoULWLduHXdNHtFsQs+opt+qTtsIPU9CpI1t9uTJE4SHhytt4kmxtuGzZ88wcOBAbN26FRYWFgAANzc3PHv2DKampnj27Bn8/PxgZmaGYcOGKW29hAgdDQQQQjTCxIkTsXPnTri7u+Phw4fo27cvACAoKAhmZmaIjo6Gnp4eAgICStxmp4ienl6Je+MWfw5jDOPGjcO3335baR3K+JbC0tISLi4uMDExqfS5RHMIOaN0RoDmEXKeiijj221LS8sSk83GxcVx90ivLm1psyI7d+7EwIED0ahRI6UsDxBfGz5//hw+Pj744osvMHToUO73xd9/LSwsMGLECFy8eJEGAohWoTkClGjZsmWYNWsW32VAIpHAyckJISEhAAqv1zI1NeXu6VnWN5BZWVmwt7eHi4tLtdbFGEPPnj3RoEGDEr9fs2YNHB0dYW9vj8GDByM1NRVA4Qypxe8v2rp162q/ge3cuRMSiaTEDLoTJkyAVCqFi4sLOnTogDNnznCPJScno2/fvrC2toajoyMuXLjAPTZq1CiYm5sL4u9WU0LJnaq/bRw0aBAiIyOxcuVKjB49Gnp6heOYKSkpsLCwgJ6eHu7fv4/Tp0+X+fq2bdvi1q1byMrKgkKhKHF3igEDBmDPnj2Ij48HABQUFODatWtlLmfDhg3l3ianqgcmI0eORFhYGHJzc6vTBLwQSr7e79ciIyPRpUsXGBkZlbgetbLHVEnIGdWEW9VpYtaAwjuAdOjQAQ4ODrC3t8eVK1cAAO/evcOECRPg5OQEW1tbLFy4EIyxKtch5DxVt8+ryNChQ/Hbb7/h3bt3yMnJwS+//ILhw4cDAP755x/06tWrysvSljYrWv/OnTtLXRaQkJAAW1vbGi9XTG2YmJiIXr16YcGCBRg3blypxwoKCgAU3pc+ODgYrq6uAGrfhuURah+3f/9+uLi4wNHREY6Ojli3bl2J5+/YsQPW1tZo06YNJk2axN3ar6CgAHPmzIG9vT2kUim8vb3x8OHDatUyfvx4SCQS7nPC8+fP0adPH9jY2EAqlWLIkCF4+fJlqdeV9VmgImL/LFBTdEaASF28eLHEh3Nvb+8Kd5YFCxaga9euiIyMrNZ6NmzYgDZt2iAqKor73enTp7Fz505cvXoVxsbGWLFiBRYvXowffvgBTk5OJW7lNX36dEgkkiqvLy4uDj///HOpU9E2bNjAbW90dDR69eqFV69eQUdHBwsXLoSHhwdOnjyJyMhIDB48GI8fP4a+vj727t3L3faGCJuhoSGGDRuGLVu2IDY2lvv9F198gTFjxuDXX39FmzZt0LNnzzJf7+HhAT8/Pzg6OqJ58+bo2rUrrl69CqDw9jqrV6/G4MGDoVAokJubC5lMhvbt26tkW7p06YIBAwZgxIgRKlm+WBXv15o3b47vv/8e0dHROHHiRInnVfSYKokpo9quqll7/vw5xo0bhxMnTsDOzg45OTnIysoCAHz77bfIz8/HrVu3oFAoMGDAABw8eLDEt5IVEVOe7t+/j169eiEzMxNZWVmwsLBAUFAQpk6dCi8vLwQGBsLJyQkAEBgYCH9/fwCF7/nVuZ2btrQZAISGhkJHR6fUQElCQgL34f19RdmsiJjacMmSJYiPj8fGjRuxceNGAMDMmTMxYcIEHDp0CD/++CP09PSgUCgwdOhQTJgwAUDFbSgWxfu4li1b4uTJkzA3N8fbt2/h7u4Od3d3eHl54fHjx/jyyy8RFRWFZs2aYeDAgfjpp58wbdo0/PXXX7h06RJu3rwJfX19rFixAkFBQThw4ECVajh8+DD09fVL/E5XVxdffvklN2/IvHnzMG/evBK3PSzvs0BF6LNAOXi8YwGvKroVxIoVK9i0adO4n9PT01nDhg1ZcnIyu3XrFuvatStzdXVldnZ27Ouvv+aeV/w2HTt37ixxu5fjx4+zHj16cD/v3r2bdezYkbm6ujJPT09248YNpW0b3rt9yvu1vO/06dMsICCgSrdSKe727dvM09OTPXz4kJmamnK/X7NmDZs0aRL38/Xr15mxsXGp12dlZbEGDRqw6OjoKq0vPz+f9erVi127do316NGDHTlypMznhYWFsYYNG7L8/HzGGGP16tXjbh/DGGMdOnRgp0+f5n5+//YqfN0mRAy546PdNMH7+2QRdWZNDPkqqw0r6t+KHhPzrX+qirJWddXJ2uLFi9miRYvKXI5MJmN79+7lfl63bh3r378/97OYc1leG1bX1KlT2cWLF0v8Tqztpqw2W716Nfvtt9/KfEysbVdE2W1YXnuJtY8rIpPJ2M6dOxljhW0xZcoU7jG5XM66du3KGGPs6NGjzNnZmaWlpbGCggI2b948Nnv27CrVkJSUxNzd3VlaWlqF9fz5558l2qWqnwXeV93PAsWJeb8R93BXDYlx1tTw8HC4uLjAyMgIs2fP5r6RSE1Nxfz583Hy5EncvXu3ytuQl5eHSZMmYceOHdDV1S3xmLu7O7Zs2YKkpCQ0a9YMe/fuRXp6Ot68eVPiMoDDhw/DysqqypcjrF+/Hl27di33FPSFCxfizz//REpKCg4dOgQdHR28fv0aeXl53O1jgMIZiYtOSxMSMeaOFGrWrBl69OiBlStXws/Pj5caKF9EXbQpa0V3APHx8cGrV6/g6emJVatWoV69enB3d8eff/6JIUOGIC8vD0ePHtWab5yU1ef98MMPSqxK2JTVZkWT+2kjZbdh8bNdixNzH/f+3RXi4+PRqlUr7vHix9D9+/dHWFgYzM3NYWxsjA8++ADnz5+v0nomTZqE1atXw9jYuNzn5OfnY/PmzRg4cCD3u8o+C5RFkz4LqBsNBJRBbLOm+vv7Y9iwYTAyMkJsbCx8fX3RsmVLeHh4YPr06QgKCkLTpk2rNRCwfPlyBAQEwM7OrtQ9Wb29vTF37lz4+/tDV1cXgwcPBoBSp1lV55Y3t2/fxqFDh0pc0/O+VatWYdWqVQgNDcX8+fNx6dKlKm+PEIgtd1XRunVrHD16tNpzU6iaRCJBSkpKictrcnJy0KBBAzx48ICbddjHxwcKhQLnzp0DACQlJaF169ZISUkp0aZJSUnqLL9M2piv2tCkbAKFc8HMnDkTH374Ife77du383LpgDZlregOIKGhoahfvz4mTJiApUuXYu3atVi4cCEWLlyITp06wdTUFB07dsTZs2eVsl6h5/P9Pq94PhUKBZo0aYJt27ahXbt21Vp+RkZGreajEXq7VbRf17bdjI2Ncf369WrXpkltlpSUhF27dmHEiBEqzZpY+7iy7q5QkWvXruH27dtISEiAiYkJFi5ciE8++QR79uyp8HXbt2+HpaVluZeXAIVzkE2dOhUNGzbEzJkzAVTtswCpHhoIKIeYZk01MzPj/m9nZwc/Pz9cunQJHh4eCA8PR3h4OObOnYvs7Gy8efMGNjY2uH//foXLPH/+POLj47F582YoFAqkpaWhdevWiIyMRJMmTTB16lTuOraIiAhYWFiUmKH18ePHiIiIwKFDhyqtHyi8likuLg7W1tYACjv7yZMnIzExEZ9++mmJ5/r4+GD69OmIiYmBu7s79PT0kJSUxI0E1mZGYlUTU+7ExtDQEJ07d8a5c+cwevRo5Obm4vHjx9DT00N2djbq1KmDsLAwdOrUqVrXtKoT5UvcKpsLRp20JWtFdwBp2LAhAGDEiBFYuXIlAKBu3brcdclA4WC1g4NDrdanyYrnc+7cuZg1axY3WRkpH7Vb9amjzcTWx5V3dwVLS0s8evSI+7n4MfTu3btLTBg+btw4+Pr6VrqusLAwXLhwAcHBwdzvpFIpjh07xk3Y+Nlnn+Hp06c4evQodHQK57avzmeB4ho3bqxRnwXUie4aUA4xzZqakJDA/f/Fixc4e/Yst6PFxcVx//bv3w97e3tuEKCiWVMvXryIJ0+eIC4uDuHh4TAxMUFcXByaNGkCoHA2VqDwllVLlizB/PnzS7z+l19+weDBg0uNfo8dOxZHjhwptb5PP/0UiYmJXK0eHh746aef8OmnnyIvL6/ELKX//PMPkpOTYWVlBaBwRuKiU5wiIyORkJCAHj16VKnt1E1MuSvuypUr6NatG5ydnbnO/n3r169Hhw4duDs/FM28XVBQgOnTp8POzg7Ozs5wd3dHdnY2Xr58CV9fXzg5OUEqlXKT/CgTYwwLFizAgAEDkJmZCW9vb+7b/6tXr6JDhw7o1KkTIiIiAADnzp2Dt7c3kpKS4O3tDXd3dzg4OGD69Onc7MS7du2Ct7c3hgwZovbbFIk1X7UhlmyWRaFQoE+fPmjfvj0++ugjpddQEW3JWtEdQHJycgAAJ06cgLOzMwAgLS2N+9s8fvwYP/74Iz7//HMAhe+vAQEBlS5frPns1atXiVsGjho1Cu3bt4dUKoVMJitxRsG2bdtgbW0NV1fXKn/7Se1W/XajNqtem4mpj6vo7gpDhgzBX3/9haSkJDDGsHXrVu6uHlZWVjh79ix3B6Tg4GA4Ojpyr7W1tS3xGaTI3r178fTpU+6YHgBu3bpVYhDg4cOHOHLkCAwMDLjXVfRZAAAWLVqEzZs3l7mNmvRZQJ3ojIByiGnW1B9++AHHjh2Dvr4+CgoKMHv27ApPxylSm1lTfX19UVBQgNzcXIwZMwbTp0/nHisoKMCuXbuwe/fuUq+7du0aPvvss2qtKy8vD+PGjcPbt2+hp6eHevXq4eDBg9w3NN999x3GjBkDa2trGBgYYM+ePaVmKRUKMeWuyJs3bzBo0CAcPHgQnp6eKCgoKPM62TFjxmDOnDkACs8iGT9+PO7du4ebN2/izJkzuHPnDnR0dPD27Vvu7/jhhx/i1KlT3HrKUtPR8pycHIwYMQKNGzfGkSNHoKurC29vb+5NMiwsDF5eXtDX1+f+HxYWhu3bt6NBgwY4fvw46tevj/z8fAwcOBAHDhzg3jyvXr2K6OhovHv3TuW3XSxOTPmqaEbt9x+7fPlymcsQUzaBwkwWnb7brFkznDx5Evv27UPjxo1x/fp1tV4moC1ZK7oDiKurK3R1deHg4MAdbP7vf//DsGHDoKenBz09PWzYsIH7+yQkJJSaX+d9YstnkYKCAhw5coTrDwHg+++/575IWLVqFZYtW4atW7fi9u3bWLp0KaKjo9G8eXMEBQVV2GZF20PtVr12ozarfpuJqY+r6O4KVlZWWL58Obp27QoA8PLywpQpUwAA06ZNQ2xsLJydnaGvrw9zc3Ou/0tOTsbr16+rfYvwS5cuYdOmTbC1teX+7h9++GGZXxK+7+bNm+UeU2nSZwG14meOQv6JeQZIqGHmWVVITk5mPj4+altfEaHcNUDToZy7BgQHBzNPT88yX9OqVSvurhF///036969O3NwcGDOzs4MAMvMzGSpqamsbdu2bNy4cWzXrl3s5cuXjDHGLl++zFq2bMnmzJnDjh49yrKzs5W6Le7u7uyrr74q8fucnBxmZGTE4uPjmbe3N4uNjWUPHjxg3bt3ZwkJCczIyIjl5OSwd+/esalTpzKpVMqcnJxYs2bN2IIFCxhjhTMF9+rVizFGWauO2vRr5bWzmLJZ1oz2+fn5bPHixczFxYW1adOGslZFynoPrcjq1avZ119/XeHfRGz5NDExYc7Ozqxhw4asTZs27NWrV9zj33//PXN3d2cODg6sTZs2rFOnTowxxjZu3MjGjx/PPS8+Pr7SO9RQu5XfbtrSF6ora5pKHX3cgQMHStwxQdUUCgVr3749d8ew6tLWuwbQpQEiVDRram2vh5o3bx5Gjx6tpKoq16RJk3JPoVKVUaNGYc+ePSXmLyA1U91R3+Jyc3MREBCAtWvX4vbt29xEMDk5OTA1NcXt27cxcuRI3Lt3D1KpFA8fPkTnzp25U98OHz6MDh06lLjersjs2bPh4uJS5r+i0fey9OzZE6dPn0ZaWhr3OwMDA3Tt2hUnT55EXFwcbG1t0bZtWzx9+hQhISHo0qULDAwMsH79eiQnJ+Pq1au4desWRo4cWeL6v/r169e4rbSVsvq16tKUbJZl3759OHv2LM6fP1/l+zoT9WRt3rx5SrmDiCbl09vbGzdu3MCzZ8/Qrl07bh6h8PBw/Pe//0VISAhu376N9evXl3lNNVA4QZwyULtVH7WZeKijjxs6dCi++OILlS3/fbq6uoiMjOTmE6gOrf4swPdIBF+UObpTfLRUSFDBvZCLRksdHByYl5cXu3//frWXn56ezopHqOheopW9xtfXlzVu3JiZmppW+bGwsDDm7Oxc5jIjIyPZsGHDGGOMpaSksJUrV1Z7W4qraTY0KQfZ2dmsTp067OnTp9zvevXqVeJerYmJiczQ0JBlZmZWaT3ltdubN2+Yubk5u3DhAmOs8FvK169fM8b+r83evn3L9PX1WVJSEmOs8B69RXUnJydzzy8oKGCdO3dmx44dY//73/9YTk4OY4yxt2/fMkNDQ5aamlr1hqlA0bo3bNjAOnToUOLbhG+//ZZZW1uzwMBA7ncjRoxg1tbW7JtvvmGMMTZnzhw2ffp0xlhhO1pYWJR572DKWqHqZq0qxo0bx1q0aMFcXFyYhYWF6LNZ1hkB//3vf7n71l+4cIGyxlSftbZt27KuXbuy3bt3c48Xf48qrrL9X8z5TEhIYPXr12dRUVHsr7/+Ys7OzkyhULCcnBzm5+fHvd/HxMQwc3Nz7v7fixcvrvRbWmq38ttNG96n1Zm1imhS38kYfTaoCJ0RQESnaLT09u3bcHd3x6xZs2q9zBs3blR4P1AA0NfXx4IFCxAaGlqtxyrSvn17/PHHHwCA1NRUrFq1qlqv10bFZ8AHwM2An5iYyI2OK2sG/IYNG+LIkSNYuHAhpFIp3NzcSt3a0cTEBCtWrEDHjh3h7u5eYnKYp0+fonfv3pBKpXB0dISjoyP69euHc+fOwd3dHS4uLujSpQvWrFkDU1PTWtX6vlmzZmHSpEno2bMnN6GQt7c3Hjx4AC8vL+55PXr0wIMHD7hrAWfOnImrV6/CwcEBY8aMgY+Pj1Lr0iSqylpZ3yq9b968eYiOji5z0itAfNl839ixY5GZmQkbGxvMmDFDqesXIiFk7cGDB9i8eTNWrFiB9evXAyj5HlUdYs5nixYtMHfuXCxZsgR9+/aFjY0NbGxs4OnpWeI2dY6Ojli2bBk8PT3h6uoKQ0PDStdN7Vb9dqM2q1nWxIY+G2ghvkci+FKT0Z3Lly+zrl27ctf9Hj16lDFWctRv3bp1rH379szZ2Zm1b9+eXb58mTFWOLo6bdo0Zmtry6RSKXNzc2NZWVksOTmZ9e7dmzk6OjInJ6cS1yfVFv7/qF9BQQGbP38+69+/P3v37l2p0dKQkBBmb2/P/Txy5Ejm7u7OnJycmJ+fHzc6yhhjW7duZW3btmUuLi7sq6++KjHqh2Ijw61atWJXrlxhjDH2559/MqlUWuIbmMePH5ca2avosbCwMObg4MDGjBnDHBwcmJubG9fmxUcE+/Tpw3R0dJizszNzd3dnjDH29ddfM1tbW+bs7MycnZ1ZXFxche1WWTbEkoOvvvqKffzxx4yxwm8LAwMD2ZgxY1hYWBhjjLHJkyezpUuXssTERObl5cXc3NyYvb09mzZtGncN1s6dO5mXlxcLCAiga5BrgLIWxhirftYcHR3Z5cuXWY8ePdj06dNZ+/btWZs2bdicOXNYQUEBY6zwW9oNGzZUqZ21AWUtjDGm+qwVOXHiBGvatCkrKCgo8R5VvE3atm2r9bmsCdqfa47arnqq0l5i6Tvps0H5nw3EvN/QQEAV/6ivX79mTZs2rfC0KcYK3+SLXLlyhdnY2DDGGIuKimK2trbcgUZqairLz89n69evZ5MnTy6xnrLMmjWLC+v7/yIiIsp8DQCWlJTEAgMD2dSpU5lCoWCMlTxtKj8/n02aNKnEZCvFt2HlypVsypQpjLHC06aaNWvGnj9/zhhjbNGiRWXu7IwVHoBZWVmxq1evsg8++KDU6UU12dkBsNDQUMYYY3/88QezsbEpdZD1/mvfvHnDTE1NuY7m3bt3LCsrq8z1FqkoG2LKwcWLF5mVlRVjjLHly5ezH3/8kW3fvp0tWbKEMcaYtbU1O3/+PMvKymLp6emMscLJWGQyGfv9998ZY4VZqlu3Lrt3756oO0pVoazVLGtFevTowXr27Mlyc3PZu3fvmLu7O9u7dy9jjAYC3kdZU0/Wirx584YBYC9evCjxHlW8TSiXNUPtVnPUdtVTWXuJqe+kzwblfzYQ835Dtw+soitXrnCnEgGAjo5OmZOjRUdH45tvvsHr16+5e4hmZWXBysoKCoUCEydOhLe3N2QyGXR0dODh4YENGzbg888/R/fu3dG3b98y11/Ve+e+TyaTYeDAgfjyyy9L/L7oNlPx8fFo1KhRiclY9u3bh99++w3Z2dnIzs6GmZkZAODs2bPo168fmjdvDqDwfp4rV64sc72enp74+OOP0aVLF+zevRvt2rWrUf3FtW7dGr169QIADBs2DJMnT8bTp08rfI2JiQmsra0xevRo+Pr6QiaTwcLCosY1iCkHHTt2RFJSEp4+fYpz585hy5Yt0NPTw8cff4znz58jISEBHh4eUCgUWLBgAcLDw8EYQ3JyMhwdHbnb8XTp0gU2NjaIioqqUW2kbJS18rNW3NixY6Gvrw99fX2MHj0aoaGhGDlyZI1q11aUNeVnjTFW5u+Lt8kHH3xQo+0mhAiDmPpOgD4baCOaI0CJaEbVkqKjo9GkSZNKd8iakkgkldagq6uLiIgIzJo1C8nJyfDw8MDFixdVUk8RTckBzYCv+ShrpYl5pmc+UdZKqyhrkZGRaNq0KZo2bVri98Xb5OzZs5WugxCi2TSl7wTos4E2ojMCqqhLly548OABLl68CE9PTxQUFCA1NbXEyF92djZyc3NhaWkJANi0aRP32MuXL6GrqwtfX1/07t0b58+fx927d6Grq4sPPvgAw4YNQ9++fdG0aVNkZGSUmkylpqN+QUFB2LVrF3x8fHDixAk0bty4xONGRkbYvn07bGxsEB0djZSUFBgbG6Nx48bIzc3Ftm3buOf27NkTK1euRFJSEszNzbF169Zy17t582akpKTg5s2b8PDwQLdu3dC1a9cabUORuLg4hIWFwdvbGwcPHkSzZs1gYWGBR48ecc8xMTFBVlYWcnNzYWBggPT0dKSnp8PT0xOenp64c+cOoqOjudHb6hJbDry9vbFmzRp07NiRe66HhwdWr16N8ePHAwBSUlJgbm6OOnXqICkpCX/++SeGDBlSozpI1VHWqpa1PXv2YOTIkVAoFNi3bx9mz55do7q1GWVNuVm7desWZs2ahQULFpR67PHjx1ybmJubQy6X12jbCSH8E1vfWYQ+G2gPOiOgimhG1arPqBoVFYW1a9di7969aNq0Kfbs2YMxY8bg9evXAACpVIrOnTsjLS0NFhYWGDNmDPfaih5zcHDArl274OTkhJUrV+L3338vNerXqFEjjB07FlKpFO3bt8fbt28REBAAJycnSKVS5OXlYdy4cTVuS7HlgGbAFy7KWtWyZmdnh65du8LJyQmenp7cqd0AsGbNGri4uGDQoEFK3T6xoawpL2tF36ItWrQIc+bMKbWM4m0yYcKEWm49IYRPYus7i6PPBtpBwsq7kE3koqKi4O7ujuvXr8PNzY3vcoiAUDZqhtqt+qjNasfLywuzZs2q9IM+tTO1QW1VNWvVQX+TmqF2qzlqu+qh9iKAuHNAlwYQQpQqNjaW7xI0BrWVemlze2vztgsd/W2qp6i9qN2qj9queqidiNjRQAAhRCnMzMxgZGSE0aNH810K0RLnzp2r0vMom6S2qpq16qBc1pyOjg61Ww1R2xFCitBAACFEKSwtLREbG4tXr17xXYrGiI2NpQMyNaBsUtaEiHJZfUU5Xr9+PXf7MjFo0KABd/s1VUtMTERqaqpa1qVqqm436jeJ2NFAACFEaSwtLbmZcQkREsomESLKZc14enqK7lpdQghRN7prACGEEEIIIYQQokVoIIAQQgghhBBCCNEiWn9pAM0ISt5Hs+oSdaGsEXWhrBExoPwSPlDutJuY//4Sxhjjuwg+xMfHw87ODpmZmXyXQgRIR0cHBQUFfJdBtABljagLZY2IgZGREWJjY2luBaJy9FmBFBFrv6O1AwFA4Q4u9tl6b9++jXHjxuHnn39W2sQ6wcHBWLp0KU6dOoXGjRsrZZlCU3xWXXXO5qvJKGs1o8oZnMWaXcpazVDWqo+yJjxmZmaiOxgnwiXGzwrUr1WfWPsdrb40QBtm6z127BgaNmyI8ePHQ09POX/uli1bYtmyZUhISEDv3r2Vskyi+ShrRF2OHTuGBg0aUNaIylHWCNFuYvysQMdrpAhNFihycrkcffr0UdqODgBNmjRBx44dIZfLlbZMovkoa0RdgoOD0bdvX8oaUTnKGiFEbOh4jRShgQARS0xMxPXr1yGTyZS+bJlMhlOnTiEvL0/pyyaah7JG1CUxMRFRUVGUNaJylDVCiNjQ8RopjgYCROzEiROQSCTo27ev0pctk8mQlpaG8PBwpS+baB7KGlGXkJAQyhpRC8oaIURs6HiNFEcDASIml8vh4eEBMzMzpS/b1dUVzZs3p1OACADKGlEfyhpRF8oaIURsqF8jxdFAgEjl5OTg1KlTKjn1BwAkEgn8/PwQHByskuUTzUFZI+qSk5OD06dPU9aIylHWCCFiQ8dr5H00ECBSFy9eREZGBvz9/VW2Dn9/f9y/fx+PHj1S2TqI8FHWiLoUZU1VBzEAZY0UoqwRQsSGjtfI+2ggQKTkcjksLCwglUpVtg4fHx8YGBjQKUBajrJG1CU4OBgffPABnJ2dVbYOyhoBKGuEEPGh4zXyPhoIECm5XA4/Pz9IJBKVraN+/fro0aMH7exajrJG1EUul0Mmk1HWiMpR1gghYkPHa+R9NBAgQg8ePMCDBw9UekpjEZlMhnPnziEjI0Pl6yLCQ1kj6vLvv//i4cOHlDWicpQ1QojY0PEaKQsNBIiQXC6HoaEhevXqpfJ1yWQy5Obm4syZMypfFxEeyhpRF8oaURfKGiFEbKhfI2WhgQARksvl8PLyQr169VS+rrZt26Jdu3Z0CpCWoqwRdaGsEXWhrBFCxIb6NVIWGggQmfT0dJw/f14tp/4UkclkkMvlYIypbZ2Ef5Q1oi7p6em4cOECZY2oHGWNECI2dLxGykMDASJz+vRp5OXlqX1nf/78OW7cuKG2dRL+UdaIulDWiLpQ1gghYkP9GikPDQSIjFwuh52dHaysrNS2Tk9PTxgbG9MpQFqGskbUJTg4GLa2tpQ1onKUNUKI2NDxGikPDQSISEFBAUJCQtQ64gcABgYG8PX1pZ1di1DWiLoUZc3f31+t66WsaR/KGiFEbOh4jVSEBgJEJDo6GklJSWrf2YHCU4CuXr2Kly9fqn3dRP0oa0RdoqKi8OLFC8oaUTnKGiFEbOh4jVSEBgJERC6Xw9TUFF27dlX7uvv16wfGGE6ePKn2dRP1o6wRdaGsEXWhrBFCxIb6NVIRGggQEblcDl9fX+jr66t93ebm5mjfvj2dAqQlKGtEXShrRF0oa4QQsaF+jVSEBgJE4sWLF/jnn394OfWniEwmw8mTJ5GXl8dbDUT1KGtEXV68eIHIyEjKGlE5yhohRGzoeI1UhgYCROLEiROQSCTo168fbzX4+/vj7du3uHz5Mm81ENWjrBF1oawRdaGsEULEhvo1UhkaCBAJuVyOjh07omnTprzV4ObmhmbNmtEpQCJHWSPqEhwcTFkjakFZI4SIDR2vkcrQQIAI5OXl4dSpU7ye+gMAOjo68PPzo51dxChrRF1yc3Mpa0QtKGuEELGh4zVSFTQQIALh4eFIS0vjfWcHCq8Funv3LuLi4vguhagAZY2oS3h4ONLT0ylrROUoa4QQsaHjNVIVNBAgAnK5HM2bN4erqyvfpaB3797Q19enkT+RoqwRdaGsEXWhrBFCxIb6NVIVNBAgAsHBwfDz84NEIuG7FJiYmMDT0xPBwcF8l0JUgLJG1EUul1PWiFpQ1gghYkPHa6QqaCBAwz169Aj3798XxKk/RWQyGcLCwvDu3Tu+SyFKRFkj6kJZI+pCWSOEiA31a6SqaCBAw8nlchgYGMDHx4fvUjj+/v7IycnB2bNn+S6FKBFljagLZY2oC2WNECI21K+RqqKBAA0nl8vRo0cPGBsb810Kp127dmjbti1dCyQylDWiLsHBwZQ1ohaUNUKI2NDxGqkqGgjQYBkZGTh37pygTv0pIpPJIJfLwRjjuxSiBJQ1oi4ZGRk4f/48ZY2oHGWNECI2dLxGqoMGAjTYmTNnkJubK9id/dmzZ4iJieG7FKIElDWiLqGhoZQ1ohaUNUKI2NDxGqkOGgjQYHK5nDvVRmi6d++OevXq0SlAIkFZI+pCWSPqQlkjhIgN9WukOmggQEMxxiCXywU54gcAhoaG6N27N90qRAQoa0RdGGMICQmhrBGVo6wRQsSGjtdIddFAgIa6ceMGnj9/Dn9/f75LKZe/vz8iIiLw+vVrvkshtUBZI+pSlDWhHsQAlDWxoKwRQsSGjtdIddFAgIaSy+UwNjZGt27d+C6lXH5+figoKMDJkyf5LoXUAmWNqEtR1jw9PfkupVyUNXGgrBFCxIaO10h10UCAhpLL5fD19YWBgQHfpZSrefPmcHNzo2uBNBxljahLcHAwZY2oBWWNECI2dLxGqosGAjTQy5cvcfXqVUGf0lhEJpPh5MmTUCgUfJdCaoCyRtTl5cuX+OeffyhrROUoa4QQsaHjNVITNBCggU6ePAnGGPr168d3KZWSyWRISUlBREQE36WQGqCsEXU5ceIEZY2oBWWNECI2dLxGaoIGAjRQcHAw2rdvD3Nzc75LqVSHDh3QpEkTmiFUQ1HWiLrI5XLKGlELyhohRGzoeI3UBA0EaJi8vDz8/fffgp4RtDgdHR34+fnRtUAaiLJG1KUoa5pwSiNAWdNklDVCiNjQ8RqpKRoI0DCXL1/G27dvNeYgBig8Bej27duIj4/nuxRSDZQ1oi5FWdOUgxiAsqapKGuEELGh4zVSUzQQoGHkcjmaNWsGNzc3vkupMl9fX+jp6dHIn4ahrBF1oawRdaGsEULEhvo1UlM0EKBh5HI5/Pz8oKOjOX86U1NTdOvWjXZ2DUNZI+pCWSPqQlkjhIgN9WukpjQnMQSPHz/G3bt3NerUnyIymQxnzpxBZmYm36WQKqCsEXWhrBF1oawRQsSG+jVSGzQQoEHkcjn09fXRu3dvvkupNplMhuzsbISFhfFdCqkCyhpRF8oaURfKGiFEbKhfI7VBAwEaRC6Xw9PTEyYmJnyXUm22trb48MMP6RQgDUFZI+pCWSPqQlkjhIgN9WukNmggQEO8e/cOYWFhGjXTcXESiQT+/v6Qy+VgjPFdDqkAZY2oC2WNqAtljRAiNtSvkdqigQANcfbsWeTk5GjkNUBFZDIZ4uPjcefOHb5LIRWgrBF1oawRdaGsEULEhvo1Uls0EKAh5HI52rZti3bt2vFdSo316NEDRkZGdAqQwFHWiLpQ1oi6UNYIIWJD/RqpLRoI0ACMMcjlco0e8QOAOnXqwMfHh3Z2AaOsEXWhrBF1oawRQsSG+jWiDDQQoAFiYmLw7Nkzjd/ZgcJTgC5fvow3b97wXQopA2WNqMutW7coa0QtKGuEELGh4zWiDDQQoAGCg4NRr149dO/ene9Sas3Pzw/5+fn4+++/+S6FlIGyRtRFLpdT1ohaUNYIIWJDx2tEGWggQAPI5XL4+vrC0NCQ71JqzcLCAi4uLnQKkEBR1ogqZWdnw8bGBrdv36asEZWirBFCxIz6NaIMNBAgcK9fv0ZERIQoTv0pIpPJcPLkSeTn5/NdCimGskZUTU9PD0+ePMHx48cRERGBvn37YtGiRbhw4QLfpdUaZU1YKGuEELGi4zWiLDQQIHAnT55EQUEB/Pz8+C5FaWQyGV6/fo2rV6/yXQophrJGVE1PTw8ODg74+++/UVBQgLCwMKxZs4bvspSCsiYslDVCiFjR8RpRFhoIEDi5XA43Nzc0b96c71KUpmPHjjAzM6NTgASGskbUQSqVIiYmBk2bNsX+/fvxyy+/iOIaR8qa8FDWCCFiRMdrRFloIEDAFAoFTp48KapTfwBAV1cXffv2pZ1dQChrRF0cHBzw5s0bJCcnY+PGjRg7dizfJSkFZU14KGuEELGh4zWiTDQQIEAZGRnIycnBlStXkJKSIrqdHSg8BejmzZt4+vQp3r17h+zsbL5L0kqUNaJuFhYWAIDJkyfjs88+47ka5aKsCQtljRAiFnS8RlRBj+8CSGlDhgyBi4sLJBIJmjRpgg4dOvBdktL16dMHurq6CAkJwZkzZ9CwYUNs27aN77K0DmWNqFtgYCAaNmwIX19fvktROsqasFDWCCFiQcdrRBXojAABaty4MS5fvgy5XI5+/frhiy++wKeffsp3WUozcOBA7N27F127doVcLsfly5dhZmbGd1laibJG1E0ikaBPnz6QSCR8l6JUlDXhoawRQsSCjteIKtBAgABJpVLcvHkTt2/fRmZmJlauXCmqkT9nZ2fMmDEDTZs2RWhoKBISEiCVSvkuSytR1ghRDsoaURfKGiHah47XiCrQpQECJJVKkZ6eDolEgoMHD2LlypWYOHEi32UpzfLly/HixQts374dBQUFAEA7O08oazUTHx+PV69e1Xo5RDwGDhyIO3fu4ODBg9zvUlJSEBUVxWNVRIyKsnbo0CEwxgAU3i5RmVkzMzODpaWl0pZHCKkdOl4jqiBhRe8iRDCePXuGli1bAgDmzZuH7777TnSnNubn52PkyJE4cOAAdHV1kZ2dDT09GpdSN8pa9cXHx8POzg6ZmZlKrJKIhY6ODncQQ4imMjIyQmxsLA0GECIQdLxGVIEGAgSIMYa6devCw8MDYWFhotvRi+Tm5sLR0RFv377Fixcv+C5HK1HWqi8qKgru7u7Ys2cP7OzslFQhEYPY2FiMHj2askHUIi8vD8OGDUNGRgZOnz6ttOUW5fj69etwc3NT2nIJITVHx2tEFWiYRYAkEgnS0tJgYGDAdykqZWBggHv37iE/P5/vUrQWZa3m7Ozs6CCZlImyQdTl8ePHyM/Ph76+Pt+lEEJUiI7XiCrQQIBAiX1HL6KjowMdHZqzkk+UNUII0UzUrxGiPeh4jSgbtTIhhBBCCCGEEKJFSp0RQLNhC1NiYiJSU1P5LkOpXr16hfT0dJUs29jYWJT3H23QoAEAiC4LmuTx48d8l6A29H5QPbGxsXyXoLEoa8JRlGPKc/XQnRZqT9v7ATEe61dVgwYN0Lx5c77LEL33+6kSAwE0G7ZwiXEmajFukzpQuxF1oPcDUh0SiQQpKSncYGV1UNaEafTo0XyXoFHoTgu1Q/0AHd8R1Xu/nyoxEPDq1StkZmbSjMcCI8aZqMW4TepA7ca/or+BUCgUCpXcXofeD6pPaNnQFJQ1oumK9v1Xr17RQEANaXs/QMd3RNXK6qfKPHqkGY+FSYx/FzFukzpQu2k3iUSCJUuWICQkBF5eXpg3bx4++eQTPHjwAIwxzJgxA1OmTMGpU6ewdu1anDp1CmlpaWjcuDF++OEHTJ48Gbt378a5c+fwyy+/YMWKFdi7dy8MDQ0BAMeOHePWRVkj1cEYw8KFCxEbG4v9+/fDyMio3OdmZWUhODgYW7ZsAUBZI4RQP6Dt20/Ui+4aQAghGkhXVxeRkZEAgMDAQNjY2ODw4cNITk6Gu7s7nJ2d4enpieHDhyMnJwdhYWHo0KEDQkNDMXnyZJw+fRr9+vVDSkoK1q5di8TERNStWxeZmZnQ0dHB69eved5ComlycnIwYsQING7cGEeOHIGurm6p5+Tn5yM0NBT79u3DhQsX0Lt3bwQGBuLcuXPqL5gQQgjRYnTXAEII0UATJ07k/h8aGoopU6YAAJo2bYqAgACEhoaibt26cHFxwaVLlxAaGoqFCxciKioKBQUFOHv2LHr27AkTExNYW1tj9OjR2LZtG968eYM6derwtVlEg8lkMjg4OOCHH34ocxAAANzd3TFu3Dj4+fnh33//xU8//YSOHTuquVJCCCGE0EAAIYRooPr165f7mEQi4f7v4+OD0NBQXLhwAb169YKTkxP27NmDhg0bwtzcHLq6uoiIiMCsWbOQnJwMDw8PXLx4UR2bQESmZ8+eOH36NNLS0sp9zvbt2zF8+HAEBQVh1KhROHLkCHJzc9VYJSGEEEIAGggowc/PD/fv36/0eUuWLMHevXuVtt6rV6/C2dkZ7dq1Q8+ePZGQkKC0ZasaX22m6ajdqk8o+2dycrLSlq0sPj4++PnnnwEAL1++xOHDh9G7d2/usX379qFBgwaoV68efHx8sGTJEvj4+AAA0tPT8eLFC3h6euLLL79Et27dEB0dzdu2CAHtnzUTFBSEgIAA+Pj4lHtpSfv27fH999/jwYMHmDx5Mo4fP47BgweruVLhEEq/Vt5xx/79++Hi4gJHR0c4Ojpi3bp13GPnzp3jzjoq+peVlVXi9Ywx9OzZs0Z3kyBECOj9oPqE3q8VycrKgr29PVxcXEr8PiYmBl5eXrCzs4OdnR0OHz4MACgoKMCcOXNgb28PqVQKb29vPHz4UGn184IVc/36dQaAXb9+nRH1yM/PZ23atGFnz55ljDG2Zs0a9tFHH5V4jhj/LmLcJnWgdlOvsvbPXr168f43AMBSUlK4n5OSktjgwYOZo6Mjc3BwYFu3buUey8/PZw0aNGDLly9njDF2//59BoD99ddfjDHGnj59yjp16sQcHR2Zk5MTCwgIYKmpqZS1GtDmNiueyZ9++olJpVKWmJhYpddeuXJFa9uND1U57igSHh7O/R1TU1NZmzZtWFhYGGOMsbCwMObs7FzhutatW8f+85//MFNTU2WVL0javO8ri7a3obZvf21Vp18rMmPGDPaf//ynRD/27t079uGHH7KLFy8yxhhTKBQsOTmZMcbYkSNHWMeOHVlubi5jjLGvv/6aDR06VAVboxplZUzrzgg4duwY7Ozs4OzsjAULFsDMzAxxcXEAgNatW+PGjRsAAC8vL8ydOxeenp5o06YNPvnkE24Z48ePx/fff6+Ueq5fvw49PT14e3sDAKZMmYLjx48jOztbKctXBqG1maagdqs+obVZWfvnhQsXlLLs2mCMlfiGrVmzZjh8+DBiYmJw+/Ztbr4AoPC+xCkpKViyZAkAoF27dmCMoX///gAACwsLREREICYmBrdu3cKhQ4dgamqq1u3hg9CypumKZ3LSpEm4efMmzM3Nq/RaAwMDFVbGP6FlrTrHHV27duX+jqamprC1teVqr8ydO3dw9OhRLFy4UCl1E6IqQttHNYHQ2qy6n6dCQ0ORkJCAUaNGlfj9vn374OHhgW7dugEonJi5SZMmAAovu8zJyUF2djYYY0hLS4OFhYVS6ueLVt01IDk5GRMnTsSlS5dga2uLnTt3Vjgz9qNHjxAWFoa8vDzY29vjypUr6Ny5c4XrCAwMLPd0mOPHj6Nly5YlfhcfH49WrVpxPxsbG8PExATPnz+HlZVVNbZONdTRZmJE7VZ9mrJ/1qtXD6mpqVXfMCI4tH8SddGUfq0qxx13797FlStXsHXr1hL1urm5QVdXFxMmTMDUqVMBAHl5eZg0aRJ27NhR7sSRhAiBEPdRoRNim1WnX0tNTcX8+fNx8uRJ3L17t8Rjd+/ehaGhIfz9/fHs2TNIpVKsW7cOTZo0Qf/+/REWFgZzc3MYGxvjgw8+wPnz5yvcDqHTqoGAiIgISKVS2NraAgDGjRtXYmTqfYGBgdDT04Oenh5cXFzw6NGjSoP7xx9/KLVmvqmjzcSI2q36aP8k6kL7J1EXsfRrz549w8CBA7F161buGzA3Nzc8e/YMpqamePbsGfz8/GBmZoZhw4Zh+fLlCAgIgJ2dXZXPICCED2LZR9VJ09ts+vTpCAoKQtOmTUsNBCgUCoSGhiIiIgItWrRAUFAQPv30Uxw8eBDXrl3D7du3kZCQABMTEyxcuBCffPIJ9uzZo7JaVU2rBgKqq/gttHR1daFQKCp9TXVHsCwtLfHkyRPu5/T0dLx9+xYtWrSoYdX8qkmbEWq3muBr/8zIyKhhxfzw8/PDhg0bYGNjU+HzlixZAhsbm1KnyRHaP2uiOrkT+6UB1SHE447nz5/Dx8cHX3zxBYYOHcr93sTEhPu/hYUFRowYgYsXL2LYsGE4f/484uPjsXnzZigUCqSlpaF169aIjIzkTrMlRBOpYx8VG6H1a+Hh4QgPD8fcuXORnZ2NN2/ewMbGBvfv34elpSW8vb3xwQcfAABGjx6NPn36AAB2795dYvLTcePGwdfXt9JtETKtGgjw8PDArVu3cP/+fdjY2GDPnj1Kv21RdUew3N3dkZeXh7CwMHh7e2Pbtm3o37+/YO7jrY42EyNqt+rTlP2ze/fuOHv2rFLrUqWQkJAqPe+rr75ScSXCQfun6lUnd1FRUfjyyy9VXBE/NKVfK++4IzExEb169cKCBQswbty4Uo81a9YMOjo6SE9PR3BwMD7++GMAKHEL0ri4OLi4uNCZAUSQhLiPCp0Q26w6/VrxvujcuXOYNWsWN6fBsGHDsGPHDqSlpcHExAQhISFwdnYGAFhZWSEkJARz586FgYEBgoOD4ejoWONtFAKtmiywadOm2L59OwYNGgQXFxfExMSgfv36vN7WRkdHB3v27MHMmTPRrl07BAcHY8OGDdzjfn5+pU5bUSchtpkmoHarPiG2WVn75+eff85bPeUR2qQ9QifErGkiZeVOzLfcEmLWqnLcce3aNQCFZ2zEx8dj48aN3C0Cd+7cCQA4dOgQnJyc4OzsDA8PD/Tu3RsTJkzgZZsIqSkh7qNCJ8Q2q06/VhFLS0sEBQWhS5cukEqlOHv2LDcvyrRp0/Dhhx/C2dkZUqkUZ86cwY8//qiybVKLym4rIDZpaWnc/48cOcJsbW15rKZq+P67qKLN+N4mdaB2qz5N2D+F9jd48eIFa9SoEYuNjWWMMfbLL78wAOzx48eMMcZatWrFoqOjGWOM9ejRgw0aNIjl5eWxzMxM1rp1a3b58mXGGGPjxo1jGzZs4JYrtO1UNto/a0eZuZszZ46o200T+jVSO9q076sKn20ohH1U0zIkhDYj1VNWxrTq0gAA2LRpE/744w/k5+fDxMRE1N9EKAu1Wc1Qu1UftVn10cR3NUNZqx3KXdVR1ggRNtpHq4/aTBy0biAgKCgIQUFBfJehUajNaobarfqozVSPJr4rRFlTL23OHWWNEGGjfbT6qM3EQavmCFClZcuWYdasWXyXQQgph1j30eKT9gCgie+IWlDuhIHvfu3cuXOoW7cuN3+Ai4sLsrKyAAAFBQWYO3cuHB0dYWtri48//rhERuLj49G/f3/Y2NjA3t4emzZt4mszCFEJvvdPTcV3u125coXrzxwcHDBlyhTk5OQAKJxo0MvLC6ampnBxcSn12h07dsDa2hpt2rTBpEmTkJeXp+bqq4cGAgghRIMJcdIeIn6UO1LExsYGN27c4P7VrVsXQOEBcVRUFKKiohAbGwsdHR1s3LgRAMAYw+DBgzF27Fjcv38fd+/exbBhw/jcDEIIAQA4OzsjMjISN27cQExMDJKTk7FlyxYAhbdNXbFiBfbt21fqdY8fP8aXX36Jixcv4uHDh3jx4gV++ukndZdfLaIaCMjKykJgYCDs7e3h7OzM3dsxKSkJ3t7ecHd3h4ODA6ZPn46CggIAwK5du+Dj44MRI0bA3t4eXbp0wd27dzF48GDY2dnB19eXu2/4smXLMGTIEPTs2RO2trbo378/Xr9+XWYta9euRceOHeHm5oa+ffty97Y8fvw4pFIpXFxc4OjoiGPHjqmhZSrGR7ulpqbytblKIaSsaRIhtZsm7aOV8fHxQWxsLG7cuIGuXbvCwsKC+0BWdOsuoPDbu0GDBnGvO3jwIMaPHw+gsJ3F9M0F9Wuqp6zcjRo1Sr2FKxn1a2W7efMmfHx8YGBgAIlEgn79+uG3334DAJw5cwaGhoYYOnQo9/xmzZqpvCaifTRl/wwMDFRDa1SdprSbKvo1IyMj6OvrAwByc3ORlZUFiUQCAGjUqBG6deuGevXqlXrdwYMHMWDAAJibm0MikeCTTz7B77//rpSaVKay2QQ1yeHDh5mvry/38+vXrxljjGVlZbH09HTGGGMKhYLJZDL2+++/M8YY27lzJzMxMWFPnjxhjDE2evRoZmVlxZKSkhhjjMlkMrZ582bGGGNLly5lTZo0YYmJiYwxxj799FM2adIk7rGZM2cyxhjbu3cv+89//sMUCgVjjLHdu3czPz8/xhhjUqmUmy05Pz+fpaSklNqOtLQ05uzszP2ztrZW6d+Fj3YbPHgwZa0GWaN9lP99VIh/g2+++YZJpVLm4ODAOnfurJTahLid1UH9muopK3eUNf77NcZKH3sU/1d8+4oLCwtj9evXZ66urqx9+/bshx9+4B775ZdfWKdOndjbt29Zbm4uCwwMZMbGxowxxjZu3MgGDBjAAgMDmYuLCxs0aBB79OhRldtcaDQ9w0KgqjbUlP1z586dgsqQprSbKvo1xhh7/Pgxk0qlrF69emzYsGEsJyenxONhYWHM2dm5xO+mT5/Ovv32W+7nO3fusJYtW5a7DnUT/V0DnJ2dERsbi6lTp6JHjx7w8/MDUHid2oIFCxAeHg7GGJKTk+Ho6Ijhw4cDADp37gxLS0sAQPv27ZGXl8eNTHfo0AEPHjzg1iGTyWBubg4AmDx5MgICAkrVcfToUURGRsLd3R0AkJ+fzz3Wq1cvzJw5Ex999BF8fX3LvL7E2NiYu/8yAERFRXHLUgU+2k0mk6lse9SBr6xVNCu3JhDLPio0NGlPadSvqR7lrpBY+rX3jz2qws3NDc+ePYOpqSmePXsGPz8/mJmZYdiwYRg/fjyePHmCHj16oG7duvDx8cGpU6cAAAqFAmfPnkVERAQcHBywdetWDBs2rEr3+SakOjRl/1yzZo0Ktr7mNKXdVNGvAUDr1q1x8+ZNZGRkYPTo0Th8+DC3jWIiqksDrKyscPfuXfTt2xeXLl2Co6MjUlJSsH79eiQnJ+Pq1au4desWRo4ciezsbO51789mXJ3ZjYtOFSmOMYZFixZx18vFxMQgJiYGALB+/Xrs3LkTRkZGGDduHFavXl3q9enp6SUm3lF18ITSbpqE2qxmhNJutd1HifAJJWtE/ISSNWUfexT/16dPnzLrMDExgampKQDAwsICI0aMwMWLF7kaly1bhujoaFy+fBn29vZwcHAAAFhaWsLV1ZX7ecyYMYiKihL8xFpE82jK/rls2TIlbK3yaEq7qaJfK65+/foYPnx4lW6PaGlpyV22ABReIlc0KCJUohoIePbsGSQSCQYMGIC1a9eCMYanT58iJSUF5ubmqFOnDpKSkvDnn3/WeB0hISF48eIFAGD79u3w8fEp9ZxBgwZh69atePPmDQAgLy8P0dHRAIB79+5x19R8+umniIiIKPX6otGron/79++vcb1VwUe7derUSVnl80IoWdM0Qmm32u6jRPioXyPqIpZ+7f1jj+L//v777zLrSkxM5K4PTk9PR3BwMFxdXQEA2dnZSElJAQC8evUKq1atwvz58wEA/fr1w7Nnz5CQkMBtn52dHXddLiHKoin7Z5s2bWq8flXQlHZTRb/28OFDblAyNzcXR44cgVQqrXR7hgwZgr/++gtJSUlgjGHr1q2CP4tAVJcGxMTEYNGiRWCMQaFQYMyYMZBKpdxpIw4ODmjRokWtPlB5enpi5MiRSEhIgLW1NXbt2lXqOaNGjcLr16/h7e0NoPAUuIkTJ8LV1RVBQUG4f/8+DAwMYGRkhB9//LHGtSgLH+02e/ZsHD58WIlboV58Za34SKMmon2UqAv1a0RdtLlfO3ToEH788Ufo6elBoVBg6NChmDBhAgDg7du38PLygo6ODgoKCjBz5kz0798fAFCvXj1s3boVMpkMjDGYmpqq/EsPop00Zf+syenrqqQp7aaKfu3s2bP473//y53B0KtXL3z55ZcAgMzMTLRr1w45OTl4+/YtLCwsMGbMGKxcuRJWVlZYvnw5unbtCgDw8vLClClTlFKTylQ2iQD5P8Unr1AnTf+7lNVumr5NqlZe1qjdKqaOfVTdf4PMzEw2bNgwZmdnx6RSKevduzdjjLHExETm5eXF3NzcmL29PZs2bRrLz89njBVO2NOrVy82fPhwZmdnxzp37szu3LnDBg0axGxtbVnv3r25yX6WLl3KAgICmLe3N7OxsWH+/v7s1atXlLVKiLFfo6wJE1/HHqTqKMO1p6ltqKz9U1O3v6aoX1O/sjImqksDCCFEbE6ePInU1FTcvXsXN2/e5L41a9CgAY4fP47r16/j1q1biIuLw4EDB7jXRUZG4rvvvsPdu3fRpk0b9O/fH1u3bkVsbCwMDAzw66+/cs+9ePEi9u3bh3v37qFly5ZYtGiR2reT8I+yRgghhGgPUV0aoGpCm8hDU1C7VR+1Wc2Isd3oDhXCRFmjrKmLGLNGiFjQ/lkz1G7CQGcEEEKIgAll5l4ifpQ1QgghRHvQQAAhhAiYUGbuJeJHWSOEEEK0h8YNBCxbtgyzZs3iuwxIJBI4OTkhJCSkxO+zsrJgb28PFxcX7ndnz55Fx44duXvozp8/n7vdTkUeP34Md3d3pd96QihtqMmoDatGKO1Unf21InFxcZg0aZIKKixfTEwMunbtCmdnZ7i6upaYuffq1atwcHDAmDFjlDJzr62tLZ48eYJvv/1WiVugfkLJnaahrFWNUPJVVr92/vx5dOjQAQ4ODrC3t8eVK1cAAN98802J+2ebmJhgzpw5la4jLi4OXl5eMDU1LdVPFhQUYO7cuXB0dIStrS0+/vhj5ObmVmmZurq6Jep59OgRACAjIwN9+vSBmZkZGjRoUOq1wcHBsLW1hbW1NQICApCWlgYAePToEVxcXGBgYCC42dcJv4S6v0ZGRqJLly4wMjLCoEGD+C0OmtlO7969w4QJE+Dk5ARbW1ssXLgQjLESz2GMoWfPnmX2J2WpqA+qrH+Kj49H//79YWNjA3t7e2zatKlK6+SzX6M5Amrh4sWLpYKwYMECdO3aFZGRkdzvGjZsiP3798PKygrZ2dnw8fHB7t27MX78+AqX36JFC4SHhyM2Nhbu7u4q2AJCtEdV99eKmJiYYOrUqfjPf/6jggrL1q9fP/Tr16/U7y0tLfHPP/+U+Zrx48eX6F+mT59e4vEvvviixM8WFhY4ePBgid9p+q0qSfVR1jRP8X7t+fPnGDduHE6cOAE7Ozvk5OQgKysLALB48WIsXrwYAJCTk4MWLVpg1KhRlS7fxMQEK1aswNu3b7nXF9mxYweioqIQFRUFfX19TJ48GRs3bsS8efMqXW7Rvb3fp6+vjwULFqBRo0bw8vIq8VhGRgY+/vhjnD9/Hra2tpg+fTq+/vprrFmzBm3atMGNGzfQunXrStdNCF+K76/NmzfH999/j+joaJw4cYLfwgSmqu307bffIj8/H7du3YJCocCAAQNw8OBBDB06lHvOhg0b0KZNG0RFRVVp3RX1QRU9xhjD4MGDsXDhQm79RWe/VYTvfo23MwK++eabEgcMGRkZaNSoEV6+fImYmBh069YNbm5usLe3x4oVK8pcxq5du0qMDgUHB5f4w/z222/o1KkT3Nzc0L17d9y8eVNVmwMACA0NRUJCQqk3V1dXV1hZWQEovJbSxcUFcXFxlS7P0NAQdevWLfdxMbahulEbVo0Y26m8/bUijRo1gqurqwqrIsWJMXdEOMSWry1btmDkyJGws7MDUHgMUda3VkePHkXLli2r9AVDo0aN0K1bN9SrV6/UYzdv3oSPjw8MDAwgkUjQr18//Pbbb7XaBkNDw3K/vTtx4gRcXV1ha2sLAJg6dSp+//33Wq2PaA6x7a8WFhbo2LEjDA0NlbpcbWqnmzdvom/fvpBIJNDX10fv3r1L9EF37tzB0aNHsXDhwiqvr6I+qKLHzpw5A0NDwxKDEEWT5laE736NtzMCxo4dC3d3d6xbtw6Ghob4888/4e3tjSZNmqBOnTpcg2ZlZaFLly7w8fGBh4dHlZd/6dIl/P7777hw4QIMDQ1x8eJFjBw5Enfu3Cn13NmzZyMsLKzM5Wzbtg2dOnWqdH2pqamYP38+Tp48ibt375b7vKSkJBw8eBDBwcFV2o7c3NxyLw0QWxvygdqwasTWTlXdX7WBkGfuFVvutJ3Qsia2fN29exetWrWCj48PXr16BU9PT6xatarUh/gdO3bg448/rvJ2lMfd3R3btm3D9OnTUbduXRw4cKBKX3IAhaf0dujQAfn5+Rg0aBAWL14MXV3dCl8THx+PVq1acT+3bt0aiYmJUCgU0NOjE1zFTmz7q6poUzu5u7vjzz//xJAhQ5CXl4ejR48iNTUVAJCXl4dJkyZhx44dlfYtynD37l00adIEw4cPx/3799G6dWusW7eO+yK4PHz3a7z1nC1btoSrqyv++usvDB06FLt27eJOJ8vKysLUqVNx48YN6Ojo4OnTp7hx40a1gnrs2DHcvHmzRMjevHmDrKysUt+yb9iwodbbM336dAQFBaFp06blfrBIS0tD//79MX/+fLRv375KyzUwMMD+/fvLHLkXWxvygdqwasTWTlXZXwn/xJY7Iixiy5dCocCFCxcQGhqK+vXrY8KECVi6dCnWrl3LPefJkycIDw/H/v37a72+8ePH48mTJ+jRowfq1q0LHx8fnDp1qtLXNW/eHAkJCWjatCnevHmDwMBArFu3DvPnz691TUS8xLa/qoo2tdPChQuxcOFCdOrUCaampujYsSPOnj0LAFi+fDkCAgJgZ2dX5QHK2lAoFDh79iwiIiLg4OCArVu3YtiwYbh27ZrK110bvA6hTpw4ETt37oS7uzsePnyIvn37AgCCgoJgZmaG6Oho6OnpISAgoMStioro6ekhPz+f+7n4cxhjGDduXJUmIlLGiFV4eDjCw8Mxd+5cZGdn482bN7CxscH9+/cBAOnp6ejbty8GDhxYpQl6qkpMbcgXasOqEVM7Vba/EuEQU+6I8IgpX5aWlnBxcUHDhg0BACNGjMDKlStLPGfnzp0YOHAgGjVqVOnyKiORSLBs2TLuTI/9+/fDwcGh0tcZGhqiadOmAAovPZg4cSL27dtX6UCApaUlTp8+zf0cFxeH5s2b09kAWkRM+6sqaUs71a1bFxs3buR+XrVqFdcHnT9/HvHx8di8eTMUCgXS0tLQunVrREZGokmTJrVab1ksLS3h6urKrX/MmDGYOnUq8vLyoK+vX+Hr+OzXeL1rwKBBgxAZGYmVK1di9OjR3EanpKTAwsICenp6uH//fokGKq5t27a4desWsrKyoFAosG/fPu6xAQMGYM+ePYiPjwdQOLtteaMyGzZswI0bN8r8V9WQxsXFcf/2798Pe3t77kNFRkYG+vbti759+5aaOAkAbG1tkZCQUOr3T548QWZmZoXrFVMb8oXasGrE1E4V7a8JCQnctVrqItTZeoVATLkTIqFkjy9iytfIkSMRFhaGnJwcAIXXnjo7O3OPFxQUYOfOnaUuC6hpn5ednY2UlBQAwKtXr7Bq1aoSH+bLO7ZJTk5GXl4egMKJCw8fPlyluVf69u2LqKgo3Lt3D0DhnAjKvqsSETYx7a+qpC3tlJaWxn1Oevz4MX788Ud8/vnnAAonHHzy5Ani4uIQHh4OExMTxMXFcYMA5fVPNdWvXz88e/aMW2ZISAjs7Oy4QYDy1sd3v8brMKqhoSGGDRuGLVu2IDY2lvv9F198gTFjxuDXX39FmzZt0LNnzzJf7+HhAT8/Pzg6OqJ58+bo2rUrrl69CqDwFkWrV6/G4MGDoVAokJubC5lMVuVT8pVp48aN+Oeff/Du3TscPnwYADB06FAsXrwYycnJeP36dZmj87du3cLixYu5WX/Loi1tqErUhlWjLe2UkJBQ7khsZmYmN7IuVkWz9VZ1hl1V05bcEX6IKV9dunTBgAED4OrqCl1dXe701CKhoaHQ0dFBr169Sryusj6vXbt2yMnJwdu3b2FhYYExY8Zg5cqVePv2Lby8vKCjo4OCggLMnDkT/fv3B4AKj23Cw8OxZMkS6OrqQqFQoGfPniXuSCCVSvHy5UukpaXBwsIC3t7e+O2332BsbIzt27dj0KBBUCgUcHR0xK+//qqMpiMaQkz76/3799GrVy9kZmYiKysLly9fVtqyxdxOFhYWCAr6f+3de3RM994/8HcuEoIEDUHdQy4ymRkZiQjBJBFk3Moph7qEVusoD6rlUHXp0acXSq1apc+jS07rUvSgZZRDJSgRhNyOcMJB3NIQCSGRZDLf3x95sn8iF7lMMrf3a62slZl9+34/85k9sz+z93cvwaxZs/Cf//wH48aNg729Pezt7bFu3bpq3Q66qv0TUPk+qKppTZs2xaZNm6DRaCCEgIuLi3QJVlXbM/p+TTwnPj5eABDx8fGCqgZAZGdn13k9u3btEn/729+qnMcSXxdL7FNDYNxqpybv1y+++EL88MMPlU6v6jVYtWqVePfdd6XHubm5omXLliIzM1MkJSWJfv36iV69eglvb+8y7/vly5eLuXPnCiGE2LJlixg1apQ0bf/+/WLgwIHS4++//14EBASIXr16ieDgYJGQkFCtflXH83FirtWcMWNmzrnHXKsdQ30Pedk+rzaq892mPnTu3FlcvHixwbfLHK47S4/hy96vlt7/6jLUfq0qDb1/quv2DLVfqyjHeGFVLbm5uWHgwIH49NNPERERUev1PH+bCSKqHzV5v1bnHtiVsabResm0MPesj6G+h9Rln1eZhv5uc+3aNWnk8KquxyUyFkO9Xy1dQ8SpofdPtd1eQ+zXDDJGQJcuXZCQkGCIVRmUjY2NdBuJUgUFBWjSpAlu374tPRcWFlbm/pgZGRlo3LhxlafkZ2RkIDExsdpJGhkZiVdffRW9evVCjx490L9//zL3ujx//jzGjx9fvY7VgKm+NtW5f3FFTLU/9ZlrdcWYvfz9aqj35/Oj9QIl9+KdNm0agJLRet966y34+voiMDAQN2/erPHr8vxovUqlEnPmzJFG632Rsa5hZL4Zh7XlHvOsYfZrCQkJL73DwC+//IL58+cDAGJiYqp1au6LcnJy8Nlnn9V4uVLjx4/HV199hTt37kCv1yMkJAQKhQIymQz+/v5ISUkBUBKTr776qtbbIdNiTvuBjIwMzJ8/HxMmTIBSqYRMJoNarca///3vOm3LnGIAlHw2ubi4QKlUSn/PjzHw4n6tLsdHK1asQOvWraFUKqFQKODv71/mEozK2lhdlS2fk5ODSZMmQSaTQS6XQyaTlRlnoar2Pj9mj7u7OxISEnDnzp1qDcRanf31i4w6WKAxODo6om/fvoiJiQEAFBYW4vr167h37540KmZ0dDT69OlT7jYYVXl+dM3KfPDBB7h48SLS0tKwYcMGrFq1CmvXrgUA9O7dGzt37qx5h8hk1VeuWTJLeH+Wjtb7n//8p9LRehMTEzFo0KBaj9b7/IHVvXv3KozF/Pnzy3zQPv9Xeq2ftbO09yhzzzSZ837tZV8sdTodRo4cWefbhNW1EPC8CRMmYM6cOUhMTERKSgr27Nkj3aWAyNjUajUSEhKQkpIClUpllYO1lsag9K+q8QXqenz0xhtvICEhAYmJiViwYAHmzp1b63VV19KlS9G6dWskJycjKSkJsbGx8Pf3r/ft1nshIDY2Fv3794dCoYBcLsfPP/9cbp61a9fC398fSqUS/v7+iI2NBVAyquTs2bPh7e0NhUIBlUqFZ8+e4f79+wgPD4evry/kcrn0C4YhCSGwaNEijBw5Enl5eVCr1dIHclxcHPz9/dGnTx+cOXMGQElFW61WIyMjA2q1GiqVCj4+Ppg9ezb0ej2AkoqWWq3G2LFj4evri7Nnz2LQoEGYM2cO/P390b17dyxYsABCiArbpFQqsX79enz++ecQQpSpor8Yk9Jb81TFXF8bS+tPfefauHHjKt02Y9Yw78/SUbIrYy2j9TLfyufb+++/b/D21oQl5h7zzDjfO6ZNm4bMzEwsW7YM0dHRUCqVmDlzJoCSX8GWL18Of39/LF68GFFRURg9erS0HZ1OhylTpkAmk0GlUkm/Vr54tkBKSgq6dOkCAJg5cyZyc3OhVCqlg4KMjAyMGzcOAQEB8PX1LXPXpdOnT0u/qk6bNg06nU6advv2bbz66qvS444dO5YpBKSmpiI0NBQeHh4YM2YMCgsLAZT8Gjdu3DiMGDECHh4eGD58OFJSUjBkyBB4eHhgwoQJ0mtBDctS9gMvCg0Nxc2bN6u1LkuNAVCyzxgyZAh69+4NHx8fTJw4EU+fPgWAl+6nAODMmTNQqVTSPmHjxo0VbufRo0fS7VVfdP78eQQFBUEulyMgIACnTp2Spmm1Wvj7+0OhUFRY0H6xj7dv30a7du1gY2MDoGRAwB49ekjzr1mzBgEBAfDz88PQoUMrzYHK5issLMQHH3wAmUwGhUKBoUOHVrq/fqmXDSJQKisrS7Rp00acOHFCCCFEcXGxyMrKKjeIQWZmprRMbGys8PT0FEIIceHCBeHl5SWKi4uFEELk5OSI4uJisXbtWvH222+X2U5F5s2bJxQKRYV/Z86cqXAZACIjI0OMHz9ezJo1S+h0OiGEECdPnhTdunUTQgixcuVKsXHjRrF582axbNkyIYQQPXr0EMePHxf5+fkiNzdXCCGETqcTGo1G7NixQwhRMqhSkyZNxOXLl6XtDRw4UISEhIjCwkLx9OlToVKpxLZt24QQQkydOlWsW7euTPsePnwoAIg//vhDREdHC4VCIYQQ5WJy7NixKgcQMdfXhrlW81yr7D3KmDXc+/O999576YA+s2bNEgBEamqq9NyFCxeEj4+P8PX1FaNHjxYjR46Utvn8gG1CCDFz5kzRrVs30a9fP7Fw4cIyA7bt2LFD+Pn5CblcLry8vMSCBQsqbUdNoZqDBTLfKs63f/zjH0Yf7Mkcc4/7NePv1yqLyYsDSJb2ceXKldLj5+eJjo4WAMTRo0eFEELs3LlTeHp6Cr1eX2Z7QgiRnJwsOnfuLIQQ4vr168LFxaXMdsLDw0VMTIwQQoiioiIxZMgQsWvXLlFQUCA6dOggjhw5IoQQ4vDhwwKAiI6OFkIIsWbNGuHk5CRCQkLEkiVLxIULF6R1Tp06VQQEBIinT58KnU4ngoKCxPbt24UQJe+Frl27iocPHwq9Xi8GDBgg+vTpIx4/fiyKioqEQqEQBw4cEBXhQG91Zw37geffK8XFxWLGjBni448/rrL/lhgDZ2dnafnw8HCh1+vFgwcPhBBC6PV6MXPmTPHpp58KIUS19lMjR46U3sdClOznhCh5T7u6ugqFQiG6dOkiWrVqJc6dO1emjdnZ2aKgoEB07NhRHDp0SAhRsr92c3MTubm54sqVK6J169bSZ2phYaHIycmpso/Hjh0TrVq1En5+fuLdd98V+/fvl7a5bds28dZbb0nzfv/99yIiIkJqb+nncVXzrVixQowcOVI8e/ZMCPH/X/eK9tfPqyjHql0IOHDggAgODq5wxc8n4eHDh8WAAQOEj4+PUCgUAoDIy8sTOTk5onv37mLq1KkiKipK3L9/XwghxOnTp0XHjh3Fe++9J/bt2yd1yhAACJVKJb3JShUUFAgnJyeRnp4u1Gq1SE1NFWlpaWLAgAHizp07wsnJSRQUFIinT5+KWbNmCblcLnx9fYWbm5tYtGiREKIk2KGhoWXWO3DgQBEVFSU9XrdunZg2bZoQouIP5KysrAo/kF+MSWxsbJUfMOb62jDXap5rlb1HGbOGe39u2bLFYr/wVbcQwHyrON94MFA73K8Zf79WWUwqKwTcunVLevxiIaBLly5l5ndxcRE3b96sUSHgyZMnws7OrswBh7u7u/jb3/4mEhMTy22jW7duUiFACCEyMjLE9u3bxcyZM0XTpk3Fjz/+KMWk9ABDiJIDnNLRvJcvXy7eeeedMtNKYy+EEJGRkeXiWYrv/bqzhv3A8wfBLVu2FO7u7tIBsLV85la0TykuLhYffvihUCqVQiaTiY4dO4rx48cLIUS19lPr1q0T3t7eYuXKleLkyZPSel8sdB89elR07NhR5OXlSW3Mzs4WSUlJ0r6olFwuFydPnhQbNmwQkydPrlEfhSjZh2m1WvHhhx+K9u3bi1mzZgkhhHj99ddFly5dpP2aTCYTMpmsXHurmq93797it99+K7fN2hQCDDpGQGFhIcaMGYM1a9YgJSUFJ06cAFAyUI6LiwtSUlIwceJEXL58GXK5HFevXkXfvn2l0wX37NkDf3//Cq97q+01hyEhIThy5AgeP34sPefg4IB+/frh0KFDuHHjBry8vNC9e3fcunULBw8eRFBQEBwcHLB27VpkZmYiLi4OSUlJmDhxYplrJps1a/bSmJSeFlKRc+fOoU2bNuWuXXsxJpMnT37pdl7GFF8bS+uPsXONMWuY96dcLn/pes1V6Wi9Bw8erPO6mG/UEJhn5Rnie0dlMalJO55vj42NTZVjULxI/N/lDWfOnJEuK7l69WqZywNe3Mbz3NzcMGHCBGzcuBFLly7Ftm3bpGmNGzeW/rezsytzWcGL06qal0yHuewHgP9/ffzt27fh4eGBWbNmAQAuXrxoNTF40fbt23Hs2DEcP34cycnJeP/99yvcP1TW3nnz5kGr1aJdu3ZYsmSJFNMXhYaG4tmzZ9LgoVWpaj9anT42bdoUERERWLVqFf7xj39IA7UKIbB48WJpv5acnIzk5ORy663ufHVV7dsHBgUFIS0tDSdPnkRwcDD0ej1ycnLQqlUraZ5nz56hsLAQnTp1AgB8/fXX0rT79+/Dzs4O4eHhGDx4MI4fP45Lly7Bzs4Or776KsaNG4ehQ4eiTZs2ePLkCVxcXMpsv7YD0SxZsgRRUVEICwvDr7/+ildeeQVAyRtx9erVCAgIkOYNDAzEF198gcjISAAl11W2bdsWjRs3RkZGBnbv3o2xY8dWub2tW7di4sSJ0vWVpSPpvigpKQnz5s3DokWLyk27fv16mZi4urpWuU1zfW0srT8NnWvPY8wa7v15586dWvW1Il26dMG+ffvKXDdrTBkZGQBKPgBLr2euCPOt5u/RqphaHpSysbFBdnY2WrRoIT1XUFCAFi1aIC0tDR06dABQMgK+TqeTciYjIwNdunRBdnZ2nQZZZJ4Z73tHaUycnZ3x6NGjGvX/xo0biI6Ohlqtxk8//QQ3Nzd06NABdnZ2uHnzJu7fv4/WrVuXuYOBs7Mz8vPzUVhYCAcHBzRr1gxqtRqfffaZNE7S3bt3odfr4eXlBZ1OJ23j6NGjuHbtmrSuvXv3Yvjw4WjUqBF0Oh2SkpLg7u5eoz6Q6bC0/UApJycnbN68GZ6enrh48SJyc3OtLgalsrOz4erqCmdnZ+Tm5iIqKkrqx/Mq209lZGTA09MTM2bMQMeOHbFkyZIKt5OYmIgnT55IY5OU8vT0hF6vx5EjRzB48GCcPn0aGRkZUCqVaNu2LT7++GNcvnwZXl5eKCoqQl5enhSjivr4z3/+E/7+/tJ4BPHx8dI+aPTo0fjyyy/xpz/9Ca1atUJRURFSUlLQq1evMm2qar6RI0di/fr16NevHxwdHaV9am3219U+I6Bly5bYu3cv/vrXv0Iul8PPz6/MQApAyY581apVCAgIgEqlgoODgzTt1q1bGDx4sHQbBZlMhmHDhiEmJkYa4CEoKAirV68ul4B1NW/ePMyYMQMhISHSF121Wo20tLQyt+8ZOHAg0tLSEBISAgCYO3cu4uLi4OPjg8mTJyMsLOyl2/L29ka/fv3g6+uL4OBg/PnPf5amrV69GkqlUqoALl68GO+99165dbwYk5eNKGrOr42l9achc+15jFnDvT/j4+Pr2GPzx3yr+XvUUjTknRaYZ8b73lEak9DQUBQUFEAul1d78CkfHx9ERUXB19cXn376KXbs2AEbGxu0b98eCxcuREBAAAIDA8scxLRq1QpTpkyBXC6XBgvctm0brl69CplMBl9fX4wZMwZZWVlwcHDAzp07MX/+fPj6+mL79u1QKBTSuvbs2SPdtkuhUMDR0RErV66sVtvJ9FjafuB57du3x/vvv49ly5YhKCio0vVYcgwAYMqUKcjLy4OnpyeGDRuG4ODgCuerrL0bNmyAj48PevXqhaVLl+LLL7+Ultm2bRuU/3f7wKlTp+KHH35A69aty6zXwcEBe/bswfLlyyGXyzFv3jz89NNPaNasGbp3744tW7Zg0qRJUCgU6NOnD65cuVJlH5OTkzFgwABpP/TLL79g69atAEruYhAZGQm1Wi0NPnjs2LFyfa1qvkWLFsHDwwN+fn5QKpWYOnUqANRqf13tMQLo5QYOHCj27t1r8PVa4utiiX1qCIxb7Rnq/Vmb1+D06dOiX79+0nW/+/btE0KUvbbvyy+/FL179xYKhUL07t1bnD59WghRcu3cu+++K7y8vIRcLhd+fn4iPz9fZGZmisGDBwuZTCZ8fX1FZGRknftWCoCIiYlhrtXQy3LDHPMgOztb6PV6sXDhQjFixAjx9OlT8fHHH4s333xTCCHEiRMnxPjx48XkyZOla7TffvttsXz5cnHv3j0xaNAg4efnJ3r27CneffddabCqLVu2iEGDBokxY8YId3d35lot1df3DqoZfjbXnbXH0Nr7T/Wvohyr9qUBRERUcw8fPsTo0aPx008/lTml70WTJ0+Wfqk7c+YMIiMjcfnyZSQmJuK3337Dv/71L9ja2uLRo0dwcHDA1q1b0bVrV/zzn/+UtlOR+fPnIzo6usJp3377baW3cyu9pRYZhrnmQUFBASZMmIBXXnkFe/fuhZ2dHdRqtfQLRHR0NAYNGoRGjRpJ/0dHR2Pz5s1o0aIF9u/fj2bNmqG4uBijRo3Crl27pF+s4+LicPHiRTx9+hQqlapG8SQiIqK6YSHAgKq6ppaIjMtY78/Y2Fh4enpKp7rZ2tqWOSW21MWLF/HJJ58gKytLut97fn4+unXrBp1Oh+nTp0OtVkOj0cDW1haBgYFYt24dFixYgAEDBmDo0KEVbr+21/bNnTu3VstRxcw1DzQaDUaNGoWPPvpIei4gIAAZGRm4desWYmJi8M0338De3h5vvvkm7t69izt37iAwMBA6nQ6LFi3C77//DiEEMjMzIZPJpEJAUFAQPD09ceHChVq1jfi9g4iIas+gdw0gIqKaM8XRfv39/eutv1QxU8wDY4+AT0RERPWDZwQQEdUjcx3td/r06fj+++9rtSyVZ655YKp3WiAiIqK64RkBRET1yJxH+yXDMec84J0WiIiILA/PCCAiqmeBgYHlDvqAkvttl1q4cCEWLlwoPf7ggw8AAH5+fhXesnDatGmYNm2a4RsLQAjB67brgTnmQakZM2ZgxowZ0uPAwMAy0wHgnXfewTvvvCM97tSpE86ePVvhuiMjI6UzB4iIiKjhVVgISE1Nbeh2UBVKXw9Lel0ssU8NgXEzPmuLvbX1ty4Yq7ph/MhcMXcNx1pjye93VN8qyi0b8VxJPz09Hd7e3sjLy2vQhtHL2draQq/XG7sZBmWJfWoIjJtpiI+Ph5+fn7GbUW/4eVB7lp4bhsZcI0vg5OSE1NRUaYwPqhnuB/j9jurfi/upMmcEdOrUCampqXjw4IFRGkeVu3fvXoX3nDZnDx48QG5ubr2su3nz5nB1da2XdRtTixYtAMDicsGcXL9+vcyt1CwVPw9qLjU1FZMmTTJ2M8wOc820lObx1q1b4e3tbezmmA1XV1cWAeqA+wHL/K5fXS1atEC7du2M3QyL9+J+qtylAZ06deKOjIioEhcuXLCKQgDAzwNqOMw10+Pt7c0zW6hBcT9A1LB41wAiIiIiIiIiK8JCABEREREREZEV4e0DiYhqgSP70os46jNZAuYvEZF1KHPXACIiqhpHNqaqcNRnsgQcAZ+IyPKxEEBEVEPp6ekWP7KxTqdDSEgIJk+ejBkzZhhknffu3cPw4cPx2WefYfDgwQZZp6l5ftRnjoJcPcw108MR8ImILB8LAUREVE50dDRCQkIQHx9v0JHDfX19oVKpEBUVZbB1knljrhERETU8DhZIRETlaLVatGvXDr169TLoejUaDX799VeePk8S5hoREVHDYyGAiIjK0Wq1iIiIgI2NjUHXq9FokJmZifPnzxt0vWS+mGtEREQNj4UAIiIq4z//+Q8uX74MjUZj8HX37dsXLVu2hFarNfi6yfww14iIiIyDhQAiIipDq9WiUaNGCAsLM/i67e3tMWTIEB6cEQDmGhERkbGwEEBERGUcOHAAAwcORPPmzetl/RqNBvHx8bh37169rJ/MB3ONiIjIOFgIICIiyZMnTxATE4Phw4fX2zaGDh0KW1tbHDx4sN62QaaPuUZERGQ8LAQQEZHkt99+Q2FhYb1cs13K1dUVgYGBPGXbyjHXiIiIjIeFACIikmi1Wnh4eKB79+71uh2NRoMjR46goKCgXrdDpou5RkREZDwsBBAREQBACIGDBw/W6y+0pTQaDZ48eYKTJ0/W+7bI9DDXiIiIjIuFACIiAgAkJibizp07DXJwJpfL0aFDB56ybaWYa0RERMbFQgAREQEoOVW7efPmCA4Orvdt2djYICIiggdnVoq5RkREZFwsBBAREYCSW7kNHjwYDg4ODbI9jUaDtLQ0/Pvf/26Q7ZHpYK4REREZFwsBRESE+/fvIy4urkFO1S4VGhoKR0dH/lJrZZhrRERExsdCABER4dChQxBCICIiosG22bRpU6jVah6cWRnmGhERkfGxEEBERNBqtejduzfatm3boNvVaDQ4ceIEHj9+3KDbJeNhrhERERkfCwFERFZOp9Ph8OHDDXqqdimNRoOioiIcPXq0wbdNDY+5RkREZBpYCCAisnKnT59GTk6OUQ7OunbtCm9vb56ybSWYa0RERKaBhQAiIiun1Wrh5uYGlUpllO1rNBocPHgQer3eKNunhsNcIyIiMg0sBBARWbkDBw5g2LBhsLU1zkeCRqNBRkYGLly4YJTtU8NhrhEREZkGFgKIiKzYjRs3cOnSJQwfPtxobejXrx9cXFx4yraFY64RERGZDhYCiIismFarRaNGjTB48GCjtaFRo0YYMmQID84sHHONiIjIdLAQQERkxbRaLYKDg+Hs7GzUdmg0Gpw7dw5//PGHUdtB9Ye5RkREZDpYCCAislJ5eXmIjo42ygjuLxo2bBhsbGzw66+/GrspVA+Ya0RERKaFhQAiIit17NgxPHv2zCQOzlq3bo2AgACesm2hmGtERESmhYUAIiIrdeDAAbi7u8PDw8PYTQFQcsr24cOHUVhYaOymkIEx14iIiEwLCwFERFZICAGtVguNRgMbGxtjNwdAycFZbm4ufv/9d2M3hQyIuUZERGR6WAggIrJCycnJuH37tlFv5faiXr16oX379jxl28Iw14iIiEwPCwFERFZIq9WiadOmGDBggLGbIrGxsUFERAQPziwMc42IiMj0sBBARGSFtFotBg8eDEdHR2M3pQyNRoMrV67g2rVrxm4KGQhzjYiIyPSwEEBEZGWysrIQGxtrEiO4vygsLAwODg78pdZCMNeIiIhMEwsBRERW5vDhw9Dr9YiIiDB2U8pp1qwZBg4cyIMzC8FcIyIiMk0sBBARWZkDBw5Ig6WZIo1Gg5iYGDx58sTYTaE6Yq4RERGZJhYCiIisiE6nw6FDh0xqBPcXDR8+HIWFhTh69Kixm0J1wFwjIiIyXSwEEBFZkTNnziA7O9skr9ku5e7uDk9PT56ybeaYa0RERKaLhQAiIiui1WrRunVr+Pv7G7spVdJoNDh48CCEEMZuCtUSc42IiMh0sRBARGRFtFothg0bBltb0979azQa3L17FwkJCcZuCtUSc42IiMh0mfanMxERGUx6ejqSk5NN+lTtUv3790fz5s15yraZYq4RERGZNhYCiIishFarhZ2dHcLDw43dlJdycHBAeHg4Dhw4YOymUC0w14iIiEwbCwFERFZCq9UiODgYLVq0MHZTqmX48OE4e/Ys7t+/b+ymUA0x14iIiEwbCwFERFYgPz8fx44dM4tTtUsNGzYMQgj8+uuvxm4K1QBzjYiIyPSxEEBEZAWio6ORn59vVgdnbm5u8Pf357XbZoa5RkREZPpYCCAisgJarRZdu3aFl5eXsZtSIxqNBocPH0ZRUZGxm0LVxFwjIiIyfSwEEBFZOCEEtFotNBoNbGxsjN2cGtFoNHj06BFOnz5t7KZQNTDXiIiIzAMLAUREFu5f//oXbt68aVanapfy8/ODm5sbR3Q3E8w1IiIi88BCABGRhdNqtXBycsKgQYOM3ZQas7W1RUREBK/dNhPMNSIiIvPAQgARkYXTarUICwtD48aNjd2UWhk+fDhSU1Nx/fp1YzeFXoK5RkREZB5YCCAismDZ2dk4ffq0WZ6qXWrw4MFo1KgRf6k1ccw1IiIi88FCABGRBTt8+DCKi4sRERFh7KbUWvPmzTFgwAAenJk45hoREZH5YCGAiMiCabVaKBQKdOjQwdhNqRONRoPo6Gg8ffrU2E2hSjDXiIiIzAcLAUREFqq4uBi//vqrWZ+qXUqj0aCgoADHjh0zdlOoAsw1IiIi88JCABGRhYqLi0NWVpZFHJx5eHige/fuvLWbiWKuERERmRcWAoiILJRWq8Urr7yCPn36GLspBjF8+HAcPHgQQghjN4VewFwjIiIyLywEEBFZKK1Wi2HDhsHOzs7YTTEIjUaD27dvIykpydhNoRcw14iIiMwLCwFERBbo9u3bSExMtIhTtUsNGDAAzZo144juJoa5RkREZH5YCCAiskAHDx6EnZ0dhgwZYuymGIyDgwMGDx7MgzMTw1wjIiIyPywEEBFZIK1Wi6CgILRs2dLYTTEojUaDM2fO4MGDB8ZuCv0f5hoREZH5YSGAiMhCFBUV4fHjx3j27BmOHj1qUadql4qIiIBer8ehQ4dQXFyMnJwcYzfJKjHXiIiIzBsLAUREFmLNmjUYPHgwYmJikJeXh+HDhxu7SQbXrl07qFQqaLVafPvttxYzSr25Ya4RERGZNxYCiIgsxCuvvIL4+Hj8/PPP6Ny5My5duoS+ffuiuLjY2E0ziFWrVmH69OkYNmwYDh06hN9//x2urq7GbpZVYq4RERGZNxYCiIgshFwuR3FxMX755Rf4+vrijTfeQI8ePSzmlm7+/v74/vvvcfHiReTk5CAuLg5yudzYzbJKzDUiIiLzZiOEEMZuBBER1d2TJ0/QvHlzAICjoyPCwsKwd+9eNGrUyMgtM5xt27Zh0qRJaNKkCQoKCrBhwwb85S9/MXazrA5zjYiIyLzxjAAiIgvRrFkzvPLKKwBKftHcvXu3RR2YAcAbb7yBr7/+Gvn5+dDr9fyV1kiYa0REROaNZwQQEVkQuVyOu3fv4tq1a3BxcTF2c+pNZGQkfvjhB2RnZ8PZ2dnYzbFKzDUiIiLzxUIAEZEFKS4uhhAC9vb2xm5KvSssLISDg4Oxm2G1mGtERETmi4UAIiIiIiIiIivCMQKIiIiIiIiIrIjln89HRBYlPT0dDx48MHYzDObevXvIyckxdjPMQosWLdCuXbtaLcs4Aw8ePEBubm69rLt58+ZwdXWtl3Wbi7rkJ1Wfq6srOnXqZOxmEBGZPRYCiMhspKenw9vbG3l5ecZuisHY2tpCr9cbuxkWj3FmDMgyODk5ITU1lcUAIqI6YiGAiMzGgwcPkJeXh61bt8Lb29vYzamz1NRUTJo0yWL6Y6oYZ8aALENpHj948ICFACKiOmIhgIjMjre3N/z8/IzdDIOxtP6YKsaZMSAiIqISHCyQiIiIiIiIyIqwEEBERERERERkRVgIICIiIiIiIrIiLAQQERlYREQErly58tL5Nm7c2ACtMV3VjdOyZcuwbdu2BmiR6WPMasdYcYuLi4NCoYCHhwdCQkJw586dCufbu3cv5HI5lEolevbsiQ8//BBCCADAjz/+CKVSCZlMBplMhi+//FJa7tixYwgICEDPnj3h4+ODhQsX8s4QRERULTai9JOGiMjEXbhwASqVCvHx8RYx4Jml9cdUMc6MgTHo9Xp4eHjgf//3f6FWq7FmzRrExcVh9+7d5ebNzc1F06ZNYWtri8LCQvTv3x+LFy/Ga6+9hlOnTsHd3R1t27bFo0ePoFKpsHnzZgwaNAgXL16Ei4sLunXrhmfPniEsLAxvvfUWIiMjG77DDYB5TERkODwjgIioFn7++Wd4e3tDoVBg0aJFcHV1xY0bNwAAXbp0QUJCAgBg0KBBeP/99xEcHAx3d3fMnDlTWsfy5cuN0PKGZYg4RUZG4quvvmr4xhsJY1Y7pha3+Ph42NvbQ61WAwDeeecd7N+/H8+ePSs3b/PmzWFrW/KV7NmzZygoKICNjQ0AoF+/fmjbti0AwMXFBV5eXlK/evXqhW7dugEAGjduDKVSKU0jIiKqCm8fSERUQ5mZmZg+fTpOnToFLy8vbNmyBVlZWZXOf+3aNURHR6OoqAg9e/ZEbGws+vbt24AtNo6GiNP48eMrPeV7//796NixY5360NCYW7VjirmWnp6Ozp07S4+bN28OZ2dn3L17Vzp4f97p06fxzjvvIC0tDX/5y18watSocvNcunQJsbGx2LRpU7lpGRkZ+Omnn3DgwIEq+0FERATwjAAioho7c+YM5HI5vLy8AABTp06Fg4NDpfOPHz8e9vb2aNKkCZRKJa5du9ZQTTWqhojTzp07kZCQUOGfuRUBAOZWbVlCrgUFBSE5ORm3bt1CfHw8Tp48WWb67du3MWrUKGzatAkdOnQoM+3x48cYMWIEFi5ciN69e9e5LUREZPl4RgARUT1r3Lix9L+dnR10Op0RW2O6ahMnSzsjoKaYW7XTELnWqVMn3Lx5U3qcm5uLR48eoX379lVup3Xr1oiIiMDu3bsxYMAAAMDdu3cRFhaGpUuX4vXXXy8zf25uLoYOHYpRo0bhvffee2k/iIiIABYCiIhqLDAwEElJSbhy5Qo8PT2xdetWFBYWGrtZJqch4rRz506Drs/YmFu1Y4q5plKpUFRUhOjoaKjVanz77bcYMWJEmSJEqcuXL8PDwwO2trbIzc2FVqvFlClTAAD37t1DaGgoFi1ahKlTp5ZZ7smTJxg6dCiGDh2KpUuX1r5zRERkdXhpABFRDbVp0wabN2/G6NGjoVQqkZycjGbNmqFFixbGbppJYZxqjjGrHVOMm62tLbZu3Yq5c+fCw8MDBw4cwLp166TpEREROH/+PICSIoNMJoNCoUDfvn0RGhqKt956C0DJLQ3T09Oxfv16KJVKKJVKbNmyBQCwfv16nD17Fnv27JGmffLJJw3fWSIiMju8fSARmQ1TunVUbm4umjdvDgDYt28fFi9ejNTU1Bqtw5T6U18MEae6Mrc410fMzC0GtWEKuUb1yxrymIioofDSACKiWvj666+xc+dOFBcXw9nZGdu2bTN2k0wS41RzjFntMG5ERETVx0IAEVEtLFmyBEuWLDF2M0we41RzjFntMG5ERETVxzECiIjI7KxYsQLz5s0zdjPIChg7127cuIFBgwbBxcUFSqWy3PTvvvsOPXr0gLu7O2bMmIGioqJqLUdERNaNhQAiIiIiE+Xs7IxVq1Zh+/bt5aZdv34dH330EU6ePImrV6/ijz/+wP/8z/+8dDkiIiIWAoiIAOTn52P8+PHo2bMnFAoFwsPDAQAZGRlQq9VQqVTw8fHB7NmzodfrAQBRUVEICwvDhAkT0LNnTwQFBeHSpUt47bXX4O3tjfDwcDx58gRAya+KY8eORUhICLy8vDBixAjk5OQYq7sGYYyYZWVlVdiWNWvWICAgAH5+fhg6dKh0//b9+/dj/PjxDRCN6mOu1Zy55JpcLodSqYRMJsPPP/9skL63atUK/fv3R9OmTctN++mnnzBy5Ei0bdsWNjY2mDlzJnbs2PHS5YiIiFgIICICcOjQIeTk5ODSpUtITEzEjz/+CABo0aIF9u/fj/j4eCQlJeHGjRvYtWuXtNy5c+fw+eef49KlS3B3d8eIESOwadMmpKamwsHBAX//+9+leU+ePInt27fj8uXL6NixIzZs2NDg/TQkY8Rs8eLF5dqxfft2XLlyBbGxsbhw4QLeeOMNzJo1CwCwdOlSfPjhh/UciZphrtWcueTat99+i4SEBCQlJWHgwIHlls/NzZVu8/fi35AhQ2ocl/T0dHTu3Fl63KVLF6Snp9d4PUREZH04WCAREQCFQoHU1FTMmjULAwcOREREBABAr9dj0aJF+P333yGEQGZmJmQyGf785z8DAPr27YtOnToBAHr37o2ioiK4ubkBAPz9/ZGWliZtQ6PRoG3btgCAt99+GxqNpiG7aHDGiNmYMWPKtWPfvn04d+4cVCoVAKC4uFiaFhoaitWrV9dD72uPuVZz5pJrc+fOxZ/+9CeEh4dXeF1+8+bNkZCQUPeAEBER1RHPCCAiAtCtWzdcunQJQ4cOxalTpyCTyZCdnY21a9ciMzMTcXFxSEpKwsSJE/Hs2TNpucaNG0v/29nZlXus0+kq3aaNjU39dKaBmErMhBBYvHgxEhISkJCQgOTkZCQnJwMA1q5dixUrVhigt4ZjKnEzJ6YSs5fl2pYtW+Dk5ISpU6fiiy++KLe8oc8I6NSpk3RpAlAyQGBp4YOIiKgqLAQQEQG4ffs2bGxsMHLkSKxZswZCCNy6dQvZ2dlo27YtGjdujIyMDOzevbvW2zh48CD++OMPAMDmzZvRp08fQzXfKIwRs7CwsHLzjB49Gps2bcLDhw8BAEVFRbh48SIA4PLly3B3d6/19usDc63mzCXXSscp+Mtf/oIzZ86UW770jICK/g4fPlzjNo8dOxa//PILMjIyIITApk2bpLMhiIiIqsJLA4iIACQnJ2Px4sUQQkCn02Hy5MmQy+XSqb4+Pj5o3759hQcH1RUcHIyJEyfizp076NGjB+bPn489e/YYsBcNyxgxi4qKKjfPG2+8gaysLKjVagCATqfD9OnT0atXLyxZssTkTsVmrtWcueTalStX4ODgACcnJ2zcuLHWbXleXl4ePDw8UFBQgEePHqFDhw6YPHkyPv30U3Tr1g0rV65Ev379AACDBg3CO++889LliIiIbIQQwtiNICKqjgsXLkClUiE+Ph5+fn7Gbk6NrFixAjk5Ofjqq6+k58y5Pw2hopjVhrXFmblWc4bKNapfzGMiIsPhpQFEREREREREVoSXBhARNQBTG7DOHDBmtcO41RxjRkRE1oZnBBARERERERFZERYCiIiIiIiIiKwICwFEZPVWrFiBefPmGbsZJs9U4mRjYwNfX18cPHgQAHDu3DkEBQXByckJo0ePLjPvuXPnEBkZ2fCNrCZTiak5MZWY1SQPf/zxRyiVSshkMshkMnz55ZfV2saWLVugVCqlP1dXV4wZM6Zay37++efo2bMnlEolAgMDcfbsWQDA3bt3MWTIEHh6ekIul2Ps2LG4f/++tJxarUarVq04cCIRkYVjIYCIiMzOyZMnERERAQBo164dvvrqK6xbt67cfO3atcP777/f0M0jK1HdPOzYsSMOHTqElJQUnDp1Chs3bkRMTMxL1z9t2jQkJCRIf23btsUbb7zx0uUSEhLwzTff4OzZs0hISMDs2bMxe/ZsAICdnR0++ugjXLlyBUlJSejWrRs++OADadno6GiMHDmymhEgIiJzxUIAEVmMTz75RPqyCwBPnjxBq1atcP/+fSQnJ6N///7w8/NDz549sWrVqgrXERUVVebXvAMHDmDQoEHS4x9++AF9+vSBn58fBgwYgMTExPrqTr2xtDh16NABAQEBcHR0rHCaTCart22XsrSYNgRLi1lVedivXz+0bdsWAODi4gIvLy/cuHGjRuuPi4tDZmZmtQ7SbWxsUFRUhKdPnwIAcnJy0KFDBwCAm5sb+vfvL83bp0+fGreFiIjMH+8aQEQWY8qUKVCpVPjyyy/h6OiI3bt3Q61Wo3Xr1mjcuDF+++03ODo6Ij8/H0FBQQgLC0NgYGC113/q1Cns2LEDJ06cgKOjI06ePImJEyfiX//6V7l558+fj+jo6ArX8+2336JPnz617mddMU6GZ+oxzcvLq3MfDc3UY1bK0Hl46dIlxMbGYtOmTTVa7rvvvsPkyZPRqFGjl86rUCgwf/58dO3aFa1atYKjoyNOnDhRbr7i4mJs2LABo0aNqlFbiIjI/LEQQEQWo2PHjujVqxd++eUXvP7664iKipJOec3Pz8esWbOQkJAAW1tb3Lp1CwkJCTU6sPj555+RmJhY5qDg4cOHyM/PR5MmTcrMW9HpwaaCcTI8U4/phQsXoFKpatm7+mHqMasPt2/fxqhRo7Bp0ybpF/rqePr0KX788UecOXOmWvNfv34de/bswdWrV9G+fXts2LAB48ePx++//y7NI4TArFmz0LJlS8ydO7fGfSEiIvPGQgARWZTp06djy5YtUKlUuHr1KoYOHQoAWLJkCVxdXXHx4kXY29tjzJgxePbsWbnl7e3tUVxcLD1+fh4hBKZOnYr//u//fmk7TP2XbsbJ8Ew5pqZ4RgBg2jErZag8vHv3LsLCwrB06VK8/vrrNVp29+7d8PHxQc+ePas1/z/+8Q/4+vqiffv2AErGGpgzZw4KCwvh4OAAAPiv//ov3Lp1C/v27YOtLa8UJSKyNtzzE5FFGT16NM6dO4dPP/0UkyZNgr19Sb0zOzsbHTp0gL29Pa5cuYIjR45UuHz37t2RlJSE/Px86HQ6bN++XZo2cuRIbN26Fenp6QAAvV6P8+fPV7iedevWlRnk6/k/Uzi4ZZwMz5Rj+uOPPxq4t4ZhyjEzZB7eu3cPoaGhWLRoEaZOnVpm2p07d+Dl5VXl8t999x3efPPNcs97eXnhzp075Z7v1q0bTp06hSdPngAoGTvBw8OjTBHg6tWr2Lt3r/QcERFZF54RQEQWxdHREePGjcM333yD1NRU6fmlS5di8uTJ+Pvf/w53d3eEhIRUuHxgYCAiIiIgk8nQrl079OvXD3FxcQCA4OBgfPHFF3jttdeg0+lQWFgIjUaD3r17N0jfDMmS4nTlyhWEhoYiLy8P+fn56NChA5YsWYJZs2bhypUr0q/M9c2SYtpQLClmVeXhsmXLkJ6ejvXr12P9+vUAgLlz52LatGm4c+eOVACpbL0JCQnSbQpLZWZmIisrC61atSq3zGuvvYZz586hd+/ecHR0RNOmTaUiyalTp/D111/Dy8tLKnJ07doVe/fuNVQoiIjIDNgIIYSxG0FEVB2l1znHx8fDz8/P2M2pM0vrT0OxsbFBdnY2WrRoUa35GWfGoD7UNA8rs3r1arRr1w6TJk2q0XK7d+/GlStXsHTp0jptvyKRkZFQKpWYN2+ewdddF8xjIiLD4aUBRERkVtzc3DBw4MByv5ASNSRD5eEHH3xQ4yIAALz++uv1UgRQq9U4fvw4mjZtavB1ExGR6WAhgIgsWpcuXZCQkGDsZpRjY2OD3NzcWi1ryn3Kyckp81xBQQGaNGmC27dvS8+FhYWVudd7RkYGGjdujPz8/GptJyMjA4mJiYiIiKh0nsjISLz66qvo1atXrW6NZk4xBoCoqCi4uLhAqVRKf5VdD18T5hSH+si1qpTm4a5du6Rc69GjB/r3748ffvhBmu/8+fMYP358leuqzjgOv/zyC+bPnw8AiImJgVKprHGbc3Jy8Nlnn1U5T3R0NK5fv44ZM2aUm9a7d2/ExMQAAJKTkxESEgKFQgGZTAZ/f3+kpKQAKHn/ffXVVzVuHxERNRwWAoiIqN44Ojqib9++0sFDYWEhrl+/jnv37kkjvEdHR6NPnz7lbulWledHiq/MBx98gIsXL+Lnn3+uVdvNjVqtLjPInbmPH1BTppBraWlp2LBhA1atWoW1a9cCKDl43rlzZ5XLv6wQoNPpMHLkyDrf5rA6hYDqmjBhAubMmYPExESkpKRgz549aNOmjUHWTURE9Y+FACKyCLGxsejfvz8UCgXkcnmFB39r166Fv78/lEol/P39ERsbC6BkNPHZs2fD29sbCoUCKpUKz549w/379xEeHg5fX1/I5XJMmzaNfaoGIQQWLVqEkSNHIi8vD2q1Wjo4i4uLg7+/P/r06SPdEz0mJgZqtRoZGRlQq9VQqVTw8fHB7NmzodfrAZT84q1WqzF27Fj4+vri7NmzGDRoEObMmQN/f390794dCxYsQE2HvbGUGFdEp9NhyJAh6N27N/70pz9VuT5LiYMp5JpSqcT69evx+eefQwhR5tf7imKSmZmJZcuWITo6GkqlEjNnzgRQctbD8uXL4e/vj8WLFyMqKgqjR4+WtqPT6TBlyhTIZDKoVCrpzI0XzxZISUlBly5dAAAzZ85Ebm4ulEqlVCjKyMjAuHHjEBAQAF9f3zKXG5w+fRpKpRIymQzTpk2DTqeTpt2+fRuvvvqq9Lhjx45lCgGpqakIDQ2Fh4cHxowZg8LCQgDAihUrMG7cOIwYMQIeHh4YPnw4UlJSMGTIEHh4eGDChAnSa0FERPVIEBGZifj4eAFAxMfHl3k+KytLtGnTRpw4cUIIIURxcbHIysoSQgjRuXNncfHiRSGEEJmZmdIysbGxwtPTUwghxIULF4SXl5coLi4WQgiRk5MjiouLxdq1a8Xbb79dZjsVmTdvnlAoFBX+nTlzpsJlAIh//vOfFfbHnPuUkZEhxo8fL2bNmiV0Op0QQoiTJ0+Kbt26CSGEWLlypdi4caPYvHmzWLZsmRBCiB49eojjx4+L/Px8kZubK4QQQqfTCY1GI3bs2CGEEGLLli2iSZMm4vLly9L2Bg4cKEJCQkRhYaF4+vSpUKlUYtu2bUIIIaZOnSrWrVsnhLC8vKkoxlu2bBHOzs7S8uHh4UKv14sHDx4IIYQ4f/48c03Uf66VevjwoQAg/vjjDxEdHS0UCoUQQlQaky1btohRo0aV6+PKlSulx8/PEx0dLQCIo0ePCiGE2Llzp/D09BR6vb7M9oQQIjk5WXTu3FkIIcT169eFi4tLme2Eh4eLmJgYIYQQRUVFYsiQIWLXrl2ioKBAdOjQQRw5ckQIIcThw4cFABEdHS2EEGLNmjXCyclJhISEiCVLlogLFy5I65w6daoICAgQT58+FTqdTgQFBYnt27cLIYRYvny56Nq1q3j48KHQ6/ViwIABok+fPuLx48eiqKhIKBQKceDAAVGRyt7LRERUc7x9IBGZvdjYWHh6eiI4OBgAYGtrW+EttS5evIhPPvkEWVlZ0v3J8/Pz0a1bN+h0OkyfPh1qtRoajQa2trYIDAzEunXrsGDBAgwYMKDS29DV9nTduXPnWlyfNBoNRo0ahY8++kh6LiAgABkZGbh16xZiYmLwzTffwN7eHm+++Sbu3r2LO3fuIDAwEDqdDosWLcLvv/8OIQQyMzMhk8nw5z//GQAQFBQET0/PMtubMmUKGjVqhEaNGmHSpEk4evQoJk6cWK22WlKMgZJLA/bt2yc91uv1WLduHbRabZXjUVhSHEwl10QlZwtUNyalpk+fXum0Ll26IDQ0FAAwbtw4vP3227h161aV63vR06dP8dtvv+GPP/6Qnnvy5AmuXLmCy5cvw97eHmFhYQCA8PBwdOvWTZpvwYIFmDRpEo4dO4YTJ04gODgY3333nTQewmuvvQYnJycAJa/LtWvXpGXDw8PRsmVLAICfnx8cHR3RvHlzAECvXr2QlpZWo34QEVHN8dIAIrIKhYWFGDNmDNasWYOUlBScOHECQMkAYy4uLkhJScHEiRNx+fJlyOVyXL16FX379kVCQgL69OmDPXv2wN/fv8LrhefPn19mkLbn/0rvaV4Rf39/i+tTSEgIjhw5gsePH0vPOTg4oF+/fjh06BBu3LgBLy8vdO/eHbdu3cLBgwcRFBQEBwcHrF27FpmZmYiLi0NSUhImTpwoXdsNAM2aNXtpTGxsbGoSwpcylxhXZPv27Th27BiOHz+OXbt2WUUcTCXXzp07hzZt2pS7Zr66MalJO55vj42NDezt7cus8/l+vai0YHHmzBlpbImrV69WejeCF/vs5uaGCRMmYOPGjVi6dCm2bdsmTWvcuLH0v52dXZnLCl6cVtW8RERUP3hGABGZvaCgIKSlpeHkyZMIDg6GXq9HTk5OmV81nz17hsLCQnTq1AkA8PXXX0vT7t+/Dzs7O4SHh2Pw4ME4fvw4Ll26BDs7O7z66qsYN24chg4dijZt2uDJkydwcXEps/3a/qI5ffp0fP/99xbVpyVLliAqKgphYWH49ddf8corrwAo+bV69erVCAgIkOYNDAzEF198gcjISABAdnY22rZti8aNGyMjIwO7d+/G2LFjq9ze1q1bMXHiROh0Omzfvl0aVb06LC3GL8rOzoarqyucnZ3x9OlTq4mDsXMtKSkJ8+bNw6JFi8pNu379eoUxcXZ2xqNHj2rU/xs3biA6OhpqtRo//fQT3Nzc0KFDB9jZ2eHmzZu4f/8+WrduXeYOBs7OzsjPz0dhYSEcHBzQrFkzqNVqfPbZZ1ixYgUA4O7du9Dr9fDy8oJOp5O2cfTo0TK/6u/duxfDhw9Ho0aNoNPpkJSUBHd39xr1gYiIjIdnBBCR2WvZsiX27t2Lv/71r5DL5fDz88OpU6fKzOPs7IxVq1YhICAAKpUKDg4O0rRbt25h8ODBkMvlkMlkkMlkGDZsGGJiYqBSqaBUKhEUFITVq1eXO4hhn8qbN28eZsyYgZCQEGRkZAAoOThLS0srcyu3gQMHIi0tDSEhIQBKLpWIi4uDj48PJk+eLJ2SXBVvb2/069cPvr6+CA4Olk7tBoDVq1dDqVSWGWDteZYW4xdNmTIFeXl58PT0xJw5cypdl6XFwZi55uHhgVmzZmHx4sV47733yq2jspiEhoaioKAAcrlcGizwZXx8fBAVFQVfX198+umn2LFjB2xsbNC+fXssXLgQAQEBCAwMLFPQadWqFaZMmQK5XC4NFrht2zZcvXoVMpkMvr6+GDNmDLKysuDg4ICdO3di/vz58PX1xfbt26FQKKR17dmzBzKZDHK5HAqFAo6Ojli5cmW12k5ERMZnIyq7kI2IyMRcuHABKpUK8fHx8PPzM3Zz6szS+tPQBg0ahHnz5lV6oF+KcWYM6qq6uUb1i3lMRGQ4PCOAiIiIiIiIyIpwjAAiIjJLpfeLJ6pvzDUiIrI0PCOAiIiIiIiIyIqwEEBERERERERkRVgIICIiIiIiIrIiLAQQERERERERWREOFkhEZic1NdXYTTCI0n5YSn9MFePMGJBlYP4SERmOjRBCGLsRRETVkZ6eDm9vb+Tl5Rm7KQZja2sLvV5v7GZYPMaZMSDL4OTkhNTUVHTq1MnYTSEiMmssBBCRWUlPT8eDBw+M3QyDuXfvHnJycozdDLPQokULtGvXrlbLMs7AgwcPkJubWy/rbt68OVxdXetl3eaiLvlJ1efq6soiABGRAbAQQERERERERGRFOFggERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZERYCiIiIiIiIiKwICwFEREREREREVoSFACIiIiIiIiIrwkIAERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZERYCiIiIiIiIiKwICwFEREREREREVoSFACIiIiIiIiIrwkIAERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZERYCiIiIiIiIiKwICwFEREREREREVoSFACIiIiIiIiIrwkIAERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZERYCiIiIiIiIiKwICwFEREREREREVoSFACIiIiIiIiIrwkIAERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZERYCiIiIiIiIiKwICwFEREREREREVoSFACIiIiIiIiIrwkIAERERERERkRVhIYCIiIiIiIjIirAQQERERERERGRFWAggIiIiIiIisiIsBBARERERERFZkf8HZvZEWm0JOpcAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 1200x1200 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "plt.figure(figsize=(12,12))\n",
+    "viz = sklearn.tree.plot_tree(model, fontsize=8, label='all', class_names=model.classes_, feature_names=[\"k\", \"rows\", \"cols\", \"use_memory_pool\"], impurity=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "57cf8617-9dd6-4e82-aeb5-ccc860ba0d51",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "inline Algo choose_select_k_algorithm(size_t rows, size_t cols, int k)\n",
+      "{\n",
+      "  if (k > 134) {\n",
+      "    if (k > 256) {\n",
+      "      if (k > 809) {\n",
+      "        return Algo::kRadix11bits;\n",
+      "      } else {\n",
+      "        if (rows > 124) {\n",
+      "          if (cols > 63488) {\n",
+      "            return Algo::kFaissBlockSelect;\n",
+      "          } else {\n",
+      "            return Algo::kRadix11bits;\n",
+      "          }\n",
+      "        } else {\n",
+      "          return Algo::kRadix11bits;\n",
+      "        }\n",
+      "      }\n",
+      "    } else {\n",
+      "      if (cols > 678736) {\n",
+      "        return Algo::kWarpDistributedShm;\n",
+      "      } else {\n",
+      "        return Algo::kRadix11bits;\n",
+      "      }\n",
+      "    }\n",
+      "  } else {\n",
+      "    if (cols > 13776) {\n",
+      "      if (rows > 335) {\n",
+      "        if (k > 1) {\n",
+      "          if (rows > 546) {\n",
+      "            return Algo::kWarpDistributedShm;\n",
+      "          } else {\n",
+      "            if (k > 17) {\n",
+      "              return Algo::kWarpDistributedShm;\n",
+      "            } else {\n",
+      "              return Algo::kFaissBlockSelect;\n",
+      "            }\n",
+      "          }\n",
+      "        } else {\n",
+      "          return Algo::kFaissBlockSelect;\n",
+      "        }\n",
+      "      } else {\n",
+      "        if (k > 44) {\n",
+      "          if (cols > 1031051) {\n",
+      "            return Algo::kWarpDistributedShm;\n",
+      "          } else {\n",
+      "            if (rows > 22) {\n",
+      "              return Algo::kWarpDistributedShm;\n",
+      "            } else {\n",
+      "              return Algo::kRadix11bits;\n",
+      "            }\n",
+      "          }\n",
+      "        } else {\n",
+      "          return Algo::kWarpDistributedShm;\n",
+      "        }\n",
+      "      }\n",
+      "    } else {\n",
+      "      if (k > 1) {\n",
+      "        if (rows > 188) {\n",
+      "          return Algo::kWarpDistributedShm;\n",
+      "        } else {\n",
+      "          if (k > 72) {\n",
+      "            return Algo::kRadix11bits;\n",
+      "          } else {\n",
+      "            return Algo::kWarpDistributedShm;\n",
+      "          }\n",
+      "        }\n",
+      "      } else {\n",
+      "        return Algo::kFaissBlockSelect;\n",
+      "      }\n",
+      "    }\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "def convert_model_to_code(model):\n",
+    "    classes = model.classes_\n",
+    "    tree = model.tree_\n",
+    "    feature_names = [\"k\", \"rows\", \"cols\", \"use_memory_pool\"]\n",
+    "    \n",
+    "    def _get_label(nodeid):\n",
+    "        \"\"\" returns the most frequent class name for the node \"\"\"\n",
+    "        return classes[np.argsort(tree.value[nodeid, 0])[-1]]\n",
+    "    \n",
+    "    def _is_leaf_node(nodeid):\n",
+    "        \"\"\" returns whether or not the node is a leaf node in the tree\"\"\"\n",
+    "        # negative values here indicate we're a leaf\n",
+    "        if tree.feature[nodeid] < 0:\n",
+    "            return True\n",
+    "        \n",
+    "        # some nodes have both branches with the same label, combine those\n",
+    "        left, right = tree.children_left[nodeid], tree.children_right[nodeid]    \n",
+    "        if (_is_leaf_node(left) and \n",
+    "            _is_leaf_node(right) and \n",
+    "            _get_label(left) == _get_label(right)):\n",
+    "            return True\n",
+    "    \n",
+    "        return False\n",
+    "    \n",
+    "    code = []\n",
+    "    def _convert_node(nodeid, indent):\n",
+    "        if _is_leaf_node(nodeid):\n",
+    "            # we're a leaf node, just output the label of the most frequent algorithm\n",
+    "            class_name = _get_label(nodeid)\n",
+    "            code.append(\" \" * indent + f\"return Algo::{class_name};\")\n",
+    "        else:             \n",
+    "            feature = feature_names[tree.feature[nodeid]]\n",
+    "            threshold = int(np.floor(tree.threshold[nodeid]))\n",
+    "            code.append(\" \" * indent + f\"if ({feature} > {threshold}) \" + \"{\")\n",
+    "            _convert_node(tree.children_right[nodeid], indent + 2)\n",
+    "            code.append(\" \" * indent + \"} else {\")\n",
+    "            _convert_node(tree.children_left[nodeid], indent + 2)\n",
+    "            code.append(\" \" * indent + \"}\")\n",
+    "    \n",
+    "    code.append(\"inline Algo choose_select_k_algorithm(size_t rows, size_t cols, int k)\")\n",
+    "    code.append(\"{\")\n",
+    "    _convert_node(0, indent=2)\n",
+    "    code.append(\"}\")\n",
+    "    return \"\\n\".join(code)\n",
+    "\n",
+    "code = convert_model_to_code(model)\n",
+    "print(code)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "412548a6-53ce-40c3-bdc2-4a69e000a7e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6750"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# also update the source code in raft/matrix/detail/select_k.cuh\n",
+    "import pathlib\n",
+    "select_k_path = pathlib.Path.cwd() / \"..\" / \"..\" / \"..\" / \"include\" / \"raft\" / \"matrix\" / \"detail\" / \"select_k-inl.cuh\"\n",
+    "source_lines = open(select_k_path.resolve()).read().split(\"\\n\")\n",
+    "\n",
+    "# figure out the location of the code snippet in the file, and splice it in\n",
+    "code_lines = code.split(\"\\n\")\n",
+    "first_line = source_lines.index(code_lines[0])\n",
+    "last_line =  source_lines.index(code_lines[-1], first_line)\n",
+    "new_source = source_lines[:first_line] + code_lines + source_lines[last_line+1:]\n",
+    "\n",
+    "open(select_k_path.resolve(), \"w\").write(\"\\n\".join(new_source))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/cpp/scripts/heuristics/select_k/generate_plots.ipynb b/cpp/scripts/heuristics/select_k/generate_plots.ipynb
new file mode 100644
index 0000000000..1ed1f432d3
--- /dev/null
+++ b/cpp/scripts/heuristics/select_k/generate_plots.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0d0564e1-137c-4458-b0b4-d4aa01301942",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "import numpy as np\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "sns.set_theme()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f91d6f1d-e198-46c8-9ac6-955995f058d1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>key_type</th>\n",
+       "      <th>index_type</th>\n",
+       "      <th>algo</th>\n",
+       "      <th>row</th>\n",
+       "      <th>col</th>\n",
+       "      <th>k</th>\n",
+       "      <th>use_index_input</th>\n",
+       "      <th>use_memory_pool</th>\n",
+       "      <th>time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix8bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000024</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kWarpImmediate</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000009</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kWarpFiltered</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179936</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>7586</td>\n",
+       "      <td>162460</td>\n",
+       "      <td>8149</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.104029</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179937</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>7586</td>\n",
+       "      <td>162460</td>\n",
+       "      <td>8149</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.104047</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179959</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix8bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000201</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179960</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bits</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000138</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>179961</th>\n",
+       "      <td>float</td>\n",
+       "      <td>int64_t</td>\n",
+       "      <td>kRadix11bitsExtraPass</td>\n",
+       "      <td>1075</td>\n",
+       "      <td>2042</td>\n",
+       "      <td>8175</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000138</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>22913 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       key_type index_type                   algo   row     col     k  \\\n",
+       "40        float    int64_t            kRadix8bits     1    1024     1   \n",
+       "41        float    int64_t           kRadix11bits     1    1024     1   \n",
+       "42        float    int64_t  kRadix11bitsExtraPass     1    1024     1   \n",
+       "43        float    int64_t         kWarpImmediate     1    1024     1   \n",
+       "44        float    int64_t          kWarpFiltered     1    1024     1   \n",
+       "...         ...        ...                    ...   ...     ...   ...   \n",
+       "179936    float    int64_t           kRadix11bits  7586  162460  8149   \n",
+       "179937    float    int64_t  kRadix11bitsExtraPass  7586  162460  8149   \n",
+       "179959    float    int64_t            kRadix8bits  1075    2042  8175   \n",
+       "179960    float    int64_t           kRadix11bits  1075    2042  8175   \n",
+       "179961    float    int64_t  kRadix11bitsExtraPass  1075    2042  8175   \n",
+       "\n",
+       "        use_index_input  use_memory_pool      time  \n",
+       "40                    0                1  0.000024  \n",
+       "41                    0                1  0.000013  \n",
+       "42                    0                1  0.000013  \n",
+       "43                    0                1  0.000009  \n",
+       "44                    0                1  0.000010  \n",
+       "...                 ...              ...       ...  \n",
+       "179936                0                1  0.104029  \n",
+       "179937                0                1  0.104047  \n",
+       "179959                0                1  0.000201  \n",
+       "179960                0                1  0.000138  \n",
+       "179961                0                1  0.000138  \n",
+       "\n",
+       "[22913 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from select_k_dataset import load_dataframe, get_dataset\n",
+    "df = load_dataframe(\"select_k_times.json\")\n",
+    "df = df[(df.use_memory_pool == True)]\n",
+    "df = df[(df.index_type == 'int64_t') & (df.key_type == 'float')]\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e1b124af-1630-4139-a1e7-ed9ea96c384d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def generate_plot(df, x_axis=\"col\", title=\"\"):\n",
+    "    fig, ax = plt.subplots()\n",
+    "    for algo in sorted(set(df.algo)):\n",
+    "        current = df[(df.algo == algo) & (df.time < np.inf)]\n",
+    "        ax.plot(current[x_axis], current[\"time\"], label=algo)\n",
+    "    ax.set_xscale('log', base=2)\n",
+    "    ax.set_yscale('log', base=2)\n",
+    "    ax.set_xlabel(x_axis)\n",
+    "    ax.set_ylabel(\"time(s)\")\n",
+    "    ax.set_title(title)\n",
+    "    fig.set_dpi(200)\n",
+    "    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=4)\n",
+    "#    fig.legend()\n",
+    "    plt.show()\n",
+    "\n",
+    "def generate_k_plot(df, col, row):\n",
+    "    return generate_plot(df[(df.col == col) & (df.row == row)], \"k\", f\"#cols={col}, #rows={row}\")\n",
+    "\n",
+    "def generate_col_plot(df, row, k):\n",
+    "    return generate_plot(df[(df.row == row) & (df.k == k)], \"col\", f\"#rows={row}, k={k}\")\n",
+    "\n",
+    "def generate_row_plot(df, col, k):\n",
+    "    return generate_plot(df[(df.col == col) & (df.k == k)], \"row\", f\"#cols={col}, k={k}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "13742a23-09a2-4ca1-aeb9-8e3914c7fc4d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAABwYAAAQVCAYAAAC4+q7tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAB7CAAAewgFu0HU+AAEAAElEQVR4nOzdd3hU17n+/Xtm1HtBlQ6m9yY6mOaKjcEdDNhgXJL45CROnJPkvHbsnPzsOHFwYscVTLOxjXHHHWwjehW9NyGEUO9t2n7/8EEHsQUIJM1I6Pu5rlyZmXvPXo9GG0uaZ9ZaFsMwDAEAAAAAAAAAAAC4qlm9XQAAAAAAAAAAAACAhkdjEAAAAAAAAAAAAGgGaAwCAAAAAAAAAAAAzQCNQQAAAAAAAAAAAKAZoDEIAAAAAAAAAAAANAM0BgEAAAAAAAAAAIBmgMYgAAAAAAAAAAAA0AzQGAQAAAAAAAAAAACaARqDAAAAAAAAAAAAQDNAYxAAAAAAAAAAAABoBmgMAgAAAAAAAAAAAM0AjUEAAAAAAAAAAACgGaAxCAAAAAAAAAAAADQDNAYBAAAAAAAAAACAZoDGIAAAAAAAAAAAANAM0BgEAAAAAAAAAAAAmgEagwAAAAAAAAAAAEAzQGMQAAAAAAAAAAAAaAZoDAIAAAAAAAAAAADNAI1BAAAAAECzN3bsWHXp0kVdunTRqVOnvF0OAAAAADQIH28XAAAAAODKFRcXKykpSW63W/fee6/+9Kc/VcufeOIJffrppwoICNC2bdvk48OfAFeLU6dOaf369dq8ebMOHTqkjIwMlZWVKTg4WHFxcerXr58mTpyopKSkWp+zS5cul1WDzWbTvn37Lus5drtd33//vb799lvt379fWVlZcrlcatGiheLj49W/f38NHjxYgwYNkp+f32Wdu7mqqKjQzp07tWPHDh06dEjHjx9XRkaGSkpKZBiGQkND1bZtW/Xt21e33nqrunfvXqvz2u12bd26VRs2bNCePXt07Ngx5efnS5IiIiLUuXNnDRs2TFOmTFFEREQDfoUAAAAA6ovFMAzD20UAAAAAuDJr167V7NmzJUnPP/+8Jk2aVC0fP3680tLSNGjQIL399tveKBH1bN++fXrqqae0a9euWh2flJSkv/71r0pMTLzksQ3dGNy4caOeeuopnThx4pLHLl++XL169bqseupi7NixSk9PlyStWrVKrVq18tjYdTV//nw9//zztT7+pptu0lNPPXXRZt7zzz+vDz74QEVFRZc8X1BQkH77299q6tSpta4BAAAAgHfwcWEAAACgCUtJSam63b9//2pZdna20tLSJEn9+vXzaF1oOMePHzc1Bdu1a6fOnTsrMjJSRUVFSklJ0ZkzZyRJmzdv1t13362lS5eqdevWtR5n2rRplzzGaq397hQff/yx/vCHP8jtdkuSfH191bdvXyUkJCgwMFAFBQU6cuSIjh07Jj6/euUCAwPVoUMHtWnTRmFhYXI6ncrMzNSOHTtUUlIiSfryyy915MgRvfvuuwoJCanxPN9//321pmBQUJB69+6tuLg4+fr6KjU1VTt27JDD4VBZWZmefvppZWRk6PHHH/fI1wkAAADgytAYBAAAAJqwHTt2SJJiYmJMTZ/t27dX3aYxePVp27at7rjjDk2aNElxcXHVMrfbrY8++kj/8z//o/LycmVlZek3v/mN3nvvPVksllqd/8knn6y3WleuXFnVFPTx8dGcOXP04IMP1tiUysnJ0ddff83SlJehbdu2+tWvfqURI0aoa9euNS4ZXFlZqUWLFmnu3Llyu906dOiQ/vGPf1z0++zj46PrrrtOd9xxhwYPHmw67+nTp/WHP/xBGzZskCS98cYbGjhwoEaPHl2/XyAAAACAesNSogAAAEAT5Xa7lZSUpOLiYl1//fX617/+VS1/9tlntXDhQkk/LeEYGRnphSpR3zZv3qxTp05p0qRJstlsFz32u+++0y9+8Yuq+/PmzdPIkSMvePy5S4kePHiw7sVKKigo0M0336ycnBxZrVa99NJLGj9+fL2cuz415aVEL8fLL7+sl156SZIUHBysDRs2yN/f33TcSy+9pNtuu+2Ss0ztdrumTp2q3bt3S5L69u2r999/v/4LBwAAAFAvar/uCwAAAIBG5ciRIyouLpZU84zAszMG27VrR1PwKpKUlKQpU6ZcsikoSRMmTFDv3r2r7q9evbohS6vRK6+8opycHEnS9OnTG2VTsDm54447qm6XlpYqNTW1xuMee+yxWi096+fnp8cee6zq/s6dO5Wfn1/3QgEAAAA0CJYSBQAAAJqos8uISub9BSsqKrR///4as4upacbYgQMHtHz5cm3cuFFZWVkqLCzUuHHj9Morr9RY02effaZNmzYpKytLFRUVioyMVKdOnTRmzBhNmTJFQUFBNY7tdDo1aNAglZWVycfHR5s3b1ZwcHCNx06fPl2bN2+WJNlsNm3atEmhoaE1Hnv//fdXLXW4cOFCDR061HTMrl279MknnyglJUWnTp1SaWmpfHx8FBYWpsTERHXr1k2DBw/Wtddee8H6G6v+/ftX7Ul46tQpj45dWVmpTz75RNJP36fZs2d7dPyG8sYbb+iFF16Q9NPX9T//8z+aMmWKl6uqnaioqGr3S0tL63zOAQMGVN02DEOnT5/mwwgAAABAI0VjEAAAAGjk1q5dq3Xr1pke37RpU9Xtzz//XF9//XXV/by8PDkcDklSWlqa/vrXv1Z77vDhwzVixIhLjv3SSy/p1VdflcvluuhxZWVl+uMf/6gvv/zSlGVmZiozM1Nr167Va6+9pr/85S817kHm4+Oj/v37a+3atXI6ndq2bZtGjRplOq6ysrJaU9Tlcmnr1q0aM2aM6Vi73a6UlBRJkq+vr2lmpdPp1DPPPFPj0ocul0vZ2dnKzs7Wzp079d577+mRRx7Rr371q4u+Fo3NuXsKut1uj479ww8/qLCwUJLUp08f016ITY1hGHruueeqluj19/fX3LlzNW7cOO8WdhmOHDlS7X7Lli3rfYxL/fcCAAAAgPfQGAQAAAAauZSUFL311lsXPWbJkiUXzLZs2aItW7ZUeywoKOiSjcF58+bp5ZdfliS1adNGvXv3VkBAgNLT0+Xj839/SpSXl2vmzJlVs9IkKTY2VgMHDlRQUJBOnjypbdu2VTXafvazn+mFF17QDTfcYBpz8ODBWrt2raSfGp81NQZTUlJkt9urPbZp06YaG4O7du1SRUWFJFXVf67nn3++WlMwLi5OvXv3VlRUlNxutwoKCnTkyBEdP378oq9VY3bo0KGq2/Hx8bV+3pYtW7Rr1y7l5OTIZrMpMjJSXbt2Vb9+/Wo9a/LscraS1KNHD0lSRkaG3nvvPX3//fdVMxhjY2M1YMAATZ48WYMGDap1jZ7kdDr1hz/8QZ9++qkkKTQ0VK+++mqjrbcmdru9aqaj9NMSxLGxsXU+77nXmCQlJCTU+ZwAAAAAGgaNQQAAAAA1mjt3rkJDQ/Xcc8+Z9oU7tzH317/+taopaLPZ9Lvf/U7Tp0+X1fp/W5qfOHFCv/71r7V37145nU798Y9/VM+ePdWqVatq501KSqq6fe6MyHOdXUJU+mlZxLy8vAsee+7j555bkvLz8/XOO+9U1f2Xv/xFt912W7UZdmdlZWXpm2++MTUWz9q5c2dVw6i+jB49usaZlZfj9OnT2rhxY9X9YcOG1fq59913X42PBwYG6vbbb9fPfvYzRUdHX/Qcu3fvrrqdmJior776Sv/93/+tkpKSasedOHFCJ06c0Icffqjrr79ezz33XKNasrW8vFy//OUvq/ZojImJ0bx589S1a9cajy8oKNC//vWveq2hbdu2mjlz5mU/z263Kzs7W1u3btWCBQuqlhgODg7W//f//X/1UtvHH39cdbtTp06KiYmpl/MCAAAAqH80BgEAAIBG7rHHHtNjjz1W7bGPPvpIv//97yX9NLNv5MiRVZnD4dCgQYNUXl6um266SXPnzr2icd1u9wVnRPn5+UmSTp48WW3G3R//+EdNmzbNdHy7du20YMECTZ48Wenp6SopKdG///1vPfvss9WO69mzp4KCglRWVqZ9+/appKREISEh1Y452xhMTEzU2LFj9fbbb+vAgQMqKipSWFhYjcdKP81GPNeOHTvkdDolSTfddJMmT558wdciNjZW06dPv2B+9OjRqiZjfYmMjKxzY/C5556rWtbx7OtVV+Xl5Xr77bf1zTff6OWXX1bfvn0veOyZM2eqbu/YsUMvvPCCnE6nfH19NXDgQLVq1UrFxcXasmWLcnNzJUnffPONcnJytGjRIvn6+ta53roqLCzUww8/XLUkbevWrfXWW2+pTZs2F3xOSUlJvV8PSUlJtW4Mdu/e/aLLebZr104vvfSSOnfuXOe69u/frw8//LDq/tSpU+t8TgAAAAANx3rpQwAAAAA0Nmdnwvn6+mrAgAHVst27d6u8vFySeZbc5bj++usvuUzismXLqvat69at20WbAuHh4frNb35TdX/FihUqLi6udoyPj0/V13N278BzVVRUaOfOnZJ++trONvvcbne1JqD000yps3sR+vn5mfYXPHfWWlRU1EW/zqbo448/1jfffFN1/9e//nVVQ/dC/Pz8dOONN+q5557TihUrtG3bNu3Zs0fr1q3T66+/ruuvv75qRmV2drYefvjhiy6zWlRUVHX7m2++kdPpVJ8+ffTVV19p4cKF+p//+R/985//1I8//qiHHnqo6tht27bplVdeudIvvd5kZmZq2rRpVU3BLl266N13371oU7Axs9lseuSRR/TFF1/US1OwrKxMTzzxRFUT8pprrtGdd95Z5/MCAAAAaDjMGAQAAACaoLN7Bp6dYVdTJtWtMXjzzTdf8phzl6mcPHlyjctwnmvChAmKiIhQQUGB7Ha7UlJSTPsIJiUlac2aNZJ+aoBee+21Vdm5+wsOHjxYgwYNksVikWEY2rRpU7UlT3fu3HnR/QXP3Qftu+++08MPP3zJpTEvZMqUKZoyZcoVPbch7N69W0899VTV/YkTJ+qWW2655POSk5MVGRlperxFixa69tprde211+qHH37QL3/5S1VWVqqgoEBPP/20Fi5cWOP5zjaoz0pISND8+fMVGhpa7XE/Pz89/vjjKi4u1rvvvitJWrRokWbNmmU61lOOHTumBx98UOnp6ZKkgQMH6rXXXqtVPa1atdLBgwcbusQLmjp1alXDvry8XBkZGdq1a5dKS0v12muvVS3pWtMenrVlGIZ+//vfV+0v6OfnpxdeeKFRzPIEAAAAcGHMGAQAAACamLS0tKpmxfnLY0r/t3xmixYt1LFjxysep0ePHhfNDcPQgQMHqu6fPyOvJr6+vurVq1fV/X379pmOOfdrOn/vwPOXBo2MjKya+XT+sRfbX1CS+vTpU9UcPH36tG6++WY9//zz2rp1a7U9FJuatLQ0Pfroo6qsrJT00yy3p59+ulbPrakpeL4xY8bov//7v6vub9iwQXv27KnxWH9//2r3H3300Ys21n75y19WzWosLS1VcnJybcqud3v27NHUqVOr/p2NGTOmxoZmY/Xf//3fevLJJ/Xkk0/q2Wef1cKFC7V27Vr95je/kZ+fn1JTU/Xwww9X2xvwcv3973/X119/XXX/6aefvuCeiwAAAAAaDxqDAAAAQBNzbnNsyJAh1TKXy1W17OHAgQPrNM6lltcsLi6Ww+Gout+yZctanffc4/Lz8015jx49FBwcLOmn/cvOXY7y7NfesmXLqvOcbfodOnSo2vnOfZ1qagz6+vrq+eefr5pxmZ+fr/nz52vatGkaOHCgpk6dqrlz52rbtm0yDKNWX5u3ZWVladasWcrOzpb003548+bNM+3TWFd33HGHEhMTq+5fqIF3/mzWc2d01iQyMrLa8rVnr2VPe/zxx6uupVtuuUUvv/yyacZpUxMUFKQ5c+boH//4h6Sflt996qmnlJaWdtnnevPNNzVv3ryq+7/5zW8a1YxZAAAAABfGUqIAAABAI7Vz5059+umnpse3bdtWdfvzzz/Xd999V3W/pKREpaWlkqSMjAw988wz1Z7bp08fTZo0qVbjX6oRUlZWVu1+YGBgrc57brPobK3nOrvPYHJystxut7Zs2aJx48ZV21/w3FmFgwcP1pIlS2QYhrZs2aLrrrvOtL9g//79a6wlKSlJn332mV5++WV9/fXXVUuPVlZWatu2bdq2bZtee+01tWvXTr/97W8v2djypvz8fM2aNUsnT56UJMXExGjBggWKjY2t97GsVquGDBmijz76SJJ09OjRGo+LiIhQTk6OpJ+afrVZqrVDhw5at26dpJ/2+PMGHx8fOZ1OSdKpU6dUUVFR781Vb5kwYYKGDh2qDRs2qLKyUkuXLtXvfve7Wj//vffe09///veq+w899JDmzJnTEKUCAAAAaAA0BgEAAIBG6ujRo3rnnXcuesyHH354wWznzp1VjbSzysrKat0YvJTzZ4OVl5ebHqvJuQ3FszMDz5eUlFQ1C23Tpk0aN26cUlJSqmYontsYPH+fweuuu047duyoWkqzT58+piUtz9W6dWv99a9/1VNPPVXVDNy+fXu1PQpPnDihn//85/qv//ovPfDAA6ZzXKiJWxejR4/W6NGja3VsSUmJZs+ercOHD0v6qQm3cOFCtW7dul5rOldMTEzV7YKCghqP6dChg44cOSLJfL1cyLnXRE2NY0+YO3eu/vCHPyg/P18pKSl68MEHL2vmZUFBgf71r3/Va01t27bVzJkz6+Vcw4YN04YNGyRJ27dvr/XzPv30U/3pT3+qun/vvffq8ccfr5eaAAAAAHgGjUEAAAAAVyQ0NFS+vr5VzbrTp0/XakbY2X3bpAvvaVfTPoPn7hl47hKqERER6tKliw4cOFB1zKWWEa1JUFCQRo4cqZEjR0qSKioq9OOPP+rf//63Dh06JEl64YUXdNNNNykuLq7ac2vTxL1ckZGRtWoMlpWVac6cOdq7d6+kn74v8+bN0zXXXFOv9dQ07lkXmi3aqVMnffvtt6bjL+bcZqC3Zul17txZixYt0syZM6+oOVhSUlLv10NSUlK9NQbDw8Orbl+oqXu+b775Rr///e+rltWdNGmSnnrqqXqpBwAAAIDnsMcgAAAA0EhNmTJFBw8erPa/uXPnVuVvvfVWtWz//v0KDQ2VJF133XWm5x48eFDPPfdcvdVnsVjUtWvXqvu12Q/O6XRq9+7dVfe7d+9e43Hn7jN48OBBFRQUVDX72rRpo/j4+GrHn20kHjlyRHl5edWaiLVtDJ4vICBAN9xwg5YsWaIWLVpIkhwOh9asWXNF52sIlZWVevTRR6tmfQUGBur1119Xz549G3zs/fv3V92+0HKlQ4cOrbqdn5+vvLy8S5732LFjVbcTEhLqUGHddOnSRQsXLqxqXqekpGjOnDkqKSnxWk315ewelFL1JuGF/Pjjj3r88cflcrkk/fTfl2effVYWi6XBagQAAADQMJgxCAAAADQhZ5tjPj4+6tevX7Xs4MGDKi4uliQNHDjQI/UMGTKkqtH3ySefaPr06RdtFqxcubJqhpK/v7/pazjLZrNV7TNoGIaSk5O1a9cuSdVnE56VlJSkRYsWVR17dglVPz+/C45RWxEREerfv3/VzLfc3FzTMVOmTNGUKVPqNM7lcjgceuyxx7Rx40ZJP32tr7zyigYMGNDgYx89erRaI/hCzdcBAwYoOjq66jVbuXKl7rrrrguet6CgQFu3bq26P2jQoHqq+Mp07dpVCxcu1P3336/8/Hxt375dc+bM0bx58y64DK4ktWrVSgcPHvRgpZfnhx9+qLrdsWPHix67YcMG/cd//EfVzOBRo0bphRdekM1ma9AaAQAAADQMZgwCAAAATcjZpkm3bt1Me7Z5o6Fy1113yWr96c+KvXv36v3337/gsUVFRfrb3/5Wdf/mm2+umuFYk3MbgG+++WaN+wueNWjQoKo65s+fX7W/YN++fS+4v2B+fv4Fxz5fRkZG1e2oqKhaP6+huFwuPf7441q9erWknxrFL774ooYNG3bF56ztfn7l5eX6/e9/XzV7LDIysmr51fNZrVZNmzat6v5rr7120Rl3L730UtX3LiYm5oLn9aSzzcGIiAhJP+3J9+CDD3pt/8PzlZWVVb1mtbF06VLt2bOn6v511113wWO3b9+un/3sZ1XnT0pK0ksvvSQ/P78rLxgAAACAV9EYBAAAAJqI/Px8HTlyRFLNMwLPNgZDQ0OrLfHZkNq0aaO777676v6f//xnvfPOO3K73dWOS01N1axZs3Tq1ClJP+0d9/Of//yi5z53FtrZPf7Of/ys8PDwqq/5Usee9fbbb2vSpElaunRptaUVz1VaWqq5c+dWzYq02WwaMWLERetuaIZh6I9//KO++eYbST81355//nmNGzeuTucdO3as/vnPf+ro0aMXPGbbtm26++67q2ZkStIvf/nLi86ee+CBB6r2ZExPT9eDDz5YbZ9JSbLb7XrxxRf19ttvVz3285///IJN3U2bNqlLly5V/zt36diG0LVrVy1atKhRNgdTU1M1YcIEzZs3r1oD+3zZ2dn6f//v/+mZZ56pemzgwIEaM2ZMjcfv27dPDz30UNXekH369NFrr72mgICA+v0CAAAAAHgUS4kCAAAATcTWrVtlGIakmhuD27ZtkyT169evavacJ/zud7/Tnj17tHv3bjmdTj3zzDN64403NGDAAAUFBenkyZPaunVr1QwzHx8f/eUvf1GrVq0uet4ePXooJCSk2gyzdu3aVTWZzjd48GDt27ev2mOX2l/wwIEDevrpp/XMM8+oTZs26tSpkyIjI+V0OpWdna3t27dXNUYkac6cOV7d9076acbXxx9/XHW/TZs22rZtW9X3/1KefPLJGh8vKCjQK6+8oldeeUWxsbHq0qWLWrRoIT8/PxUWFmrv3r1KS0ur9pxp06bp3nvvveh4QUFBevnllzVz5kyVlZUpJSVF119/vQYNGqRWrVqpuLhYW7ZsUU5OTtVzbr311kue19PONgdnzpypgoKCqmVF33zzzYs2Rj0hMzNTf/vb3/S3v/1NLVu2VOfOnRUZGSlfX1+Vlpbq2LFjOnjwYNW/QUlq3759tT1Lzzd79uyqpYklqXXr1nrhhRdqVc/o0aM1evToK/+CAAAAADQYGoMAAABAE7Fly5aq2/3796+WpaamVs168/S+bIGBgVq0aJH++Mc/6quvvpIknTlzRl988YXp2JiYGP3lL3+pVdPg7D6DZ5fLlGpeRvSspKQkLViwoOq+n5+f+vbte8Hjz23mGIah1NRUpaam1nisr6+vHnnkEf3iF7+4ZN0NLS8vr9r9EydO6MSJE7V+/oUag+fKyspSVlbWBfPw8HD99re/1Z133lmrMXv37q233npLTzzxhE6ePCmHw6H169ebjvPx8dEDDzygX//61xc939kG+Vme2u/u/Obgtm3bvN4c9PHxkdVqrZqlm56ebpqReS6r1ao77rhDv/nNbxQeHn7B486/zlasWFHrmiIjI2kMAgAAAI0UjUEAAACgiTi7VGjHjh1N+9ydu79gTbMJG1pwcLBefPFFzZw5U59++qk2b96srKwsVVRUKDIyUp07d9a1116r22+/3bQ34sUMHjy41o3BQYMGyWazVc2Kutj+gpI0a9YsXXfddVq/fr1SUlJ08OBBpaenq7S0VBaLRWFhYerQoYOGDBmi2267TS1btqx13U3RN998ox07dlS9Fnl5ecrPz1dZWZmCgoIUFRWlnj17aujQobr55psVGBh4Wefv16+fPvvsM61YsUJfffWVjh49qtzcXAUFBSkxMVFDhw7VXXfdpfbt21/yXAcPHqy63aZNm4s2gOtbY2sOdurUSevWrdO6deuqvndpaWkqKCiQ0+lUcHCwIiIi1LlzZ/Xv318TJ0684KxbAAAAAFc/i3H+Ry0BAAAAAGjEHn30UX3//feSpOeff16TJk3yckUAAAAA0DR4buMRAAAAAADqyOVyVc2Q7dy5s2655RYvVwQAAAAATQeNQQAAAABAk7F3714VFRVJkn75y1/KauXPWgAAAACoLf6CAgAAAAA0GRs3bpQk9erVS+PHj/dyNQAAAADQtLDHIAAAAAAAAAAAANAMMGMQAAAAAAAAAAAAaAZoDAIAAAAAAAAAAADNgI+3CwAA1N7777+vDRs26ODBg8rNzVVpaanCw8PVq1cv3XPPPRozZoy3SwQAAAAAAAAANFLsMQgATcgNN9ygtLQ0de7cWXFxcQoICFBaWpr27NkjSZo1a5Z+97vfeblKAAAAAAAAAEBjRGMQAJqQlJQUde7cWcHBwdUe37p1q+bMmaOysjItW7ZMffr08VKFAAAAAAAAAIDGij0GAaAJ6devn6kpKEkDBw7UjTfeKEnasGGDp8sCAAAAAAAAADQBNAYB4Crh4/PTtrF+fn5ergQAAAAAAAAA0Bj5eLsAAGjMSkpKtG7dOm3atEn79u3TiRMnVFxcLH9/f8XGxqp3796aOHGiRo4cKYvF4rU69+/fr6+++ko2m00jR470Wh0AAAAAAAAAgMaLPQYB4AIWLFiguXPnqrKy8pLHDhw4UH/729+UmJjogcqkDz/8UFu2bJHD4VB6erp27NghHx8fPfnkk7rrrrs8UgMAAAAAAAAAoGlhxiAAXMDx48ermoJxcXEaNmyYevTooejoaFVWVmrHjh367LPPVFZWpq1bt2r69OlatmyZoqOjG7y27du36+OPP666HxgYqD/84Q+6/fbbG3xsAAAAAAAAAEDTxIxBALiAp556SqdOndKsWbM0dOhQWa3mbVnT09M1e/ZsHT9+XJI0ZcoUPfvss6bjnnjiCe3ateuyxp8wYYIef/zxix5TVlam1NRULVmyRB9++KFGjhypl19+WQEBAZc1FgAAAAAAAADg6kdjEAAuoKCgQBEREZc87sCBA5o0aZKkn2bubdiwQYGBgdWOmT59ujZv3nxZ40+ePFnPPfdcrY//4x//qOXLl+s///M/9eijj17WWAAAAAAAAACAqx+NQQCoBzfccEPVrMFPP/1UXbt29XgNW7Zs0X333adevXpp+fLlHh8fAAAAAAAAANC4sccgANSDkJCQqttn9yX0tKioKElSXl6eV8aXpKKiCrlcbq+Nj/oXFhYgm80ql8utoqIKb5eDqwjXFhoC1xUaAtcVGgrXFhoC19XVy2azKiyMbUMAAHVHYxAA6shut+vEiRNV9xMTE71Sx6ZNmyRJbdu29cr4kuRyueV0urw2PhoW31s0FK4tNASuKzQEris0FK4tNASuKwAAUBOrtwsAgKZuxYoVKi4uliT16NFDMTExDTLOnj179N1338npdJqyH374QS+++KIk6c4772yQ8QEAAAAAAAAATRszBgGgDvLy8vT3v/+96v6jjz7aYGOdOXNGv/jFLxQWFqYePXooOjpaxcXFOn78uE6ePClJmjVrlm666aYGq+FSWNbk6mO1Wqr+Pyoq2MvV4GrCtYWGwHWFhsB1hYbCtYWGwHUFAAAuxWIYhuHtIgCgKbLb7XrggQe0detWSdL48eP173//u8HGy8zM1LJly7R582adPHlSeXl5slqtio2NVb9+/XTXXXdp4MCBDTY+AAAAAAAAAKBpozEIAFfA7XbriSee0Oeffy5JatOmjZYvX67w8HAvV+ZdLpfb2yWgnlmtFlksFhmGIbebXxlQf7i20BC4rtAQuK7QULi20BC4rq5uNhu7QgEA6o6lRAHgMhmGoaeeeqqqKZiYmKgFCxY0+6agJBUVVbDB/VUmKipYNptFbrehvLxSb5eDqwjXFhoC1xUaAtcVGgrXFhoC19XVy8fHpsjIIG+XAQC4CvAxEwC4DIZh6E9/+pOWLVsmSYqPj9eiRYvUqlUrL1cGAAAAAAAAAMDF0RgEgFoyDENPP/203nvvPUlSXFycFi9erDZt2ni5MgAAAAAAAAAALo3GIADUwtmm4LvvvitJio2N1eLFi9W2bVsvVwYAAAAAAAAAQO3QGASASzi/KRgTE6PFixerXbt23i0MAAAAAAAAAIDLQGMQAC7hmWeeMTUF27dv7+WqAAAAAAAAAAC4PDQGAeAi/vznP2vp0qWS/q8p2KFDBy9XBQAAAAAAAADA5fPxdgEA0FjNnTtXb7/9tiTJYrFoxowZOnbsmI4dO3bR53Xv3l2JiYmeKBEAAAAAAAAAgFqjMQgAF7B9+/aq24Zh6IUXXqjV85599llNmTKlocoCAAAAAAAAAOCKsJQoAAAAAAAAAAAA0AwwYxAALmDJkiXeLgEAAAAAAAAAgHrDjEEAAAAAAAAAAACgGaAxCAAAAAAAAAAAADQDNAYBAAAAAAAAAACAZoDGIAAAAAAAAAAAANAM0BgEAAAAAAAAAAAAmgEagwAAAAAAAAAAAEAzQGMQAAAAAAAAAAAAaAZoDAIAAAAAAAAAAADNAI1BAAAAAAAAAAAAoBmgMQgAAAAAAAAAAAA0AzQGAQAAAAAAAAAAgGaAxiAAAAAAAAAAAADQDPh4uwAAAAAAAAAA8BR3RbmKt22Vu6xMYUOGyRYa6u2SAADwGBqDAAAAAAAAAJqF0j27lbl4oZx5uZKk4s2b1Pr3/y2LlYXVAADNA41BAAAAAAAAAFc1V2mpspe9p6J1a6o9XnH8mFylJfIJDfNSZQAAeBaNQQAAAAAAAABXrZIdKcpcskiuwgJT5t+6tWxBwZ4vCgAAL6ExCAAAAAAAAOCq4youVta776h488Ya88Cu3RQ/a44sNpuHKwMAwHtoDAIAAAAAAAC4ahiGoZKtW5S1dIlcxcWm3BoQoBZ33qPwUaNlsVi8UCEAAN5DYxAAAAAAAADAVcFZUKCsd5aoJGVbjXlQz96KmzFTvlHRHq4MAIDGgcYgAAAAAAAAgCbNMAwVb1ivrPeWyl1WasqtQUGKvWeaQocOY5YgAKBZozEIAAAAAAAAoMly5OUqc/Eile3ZVWMe0m+AYqdNl09EhGcLAwCgEaIxCAAAAAAAAKDJMdxuFa5ZrZwP3pe7osKU20JDFTt1ukIGDmKWIAAA/4vGIAAAAAAAAIAmxZ6dpcxFC1R+YH+NeWjSEMXcO1U+oWEergwAgMaNxiAAAAAAAACAJsFwu1Xw/UrlfLRcht1uym3hEYq7b4ZC+vX3QnUAADR+NAYBAAAAAAAANHr2jNM6s/AtVRw9UmMeNmKkYu68R7bgYA9XBgBA00FjEAAAAAAAAECjZbhcyv/2a+V++rEMp9OU+0RFK27mAwru0dML1QEA0LTQGAQAAAAAAADQKFWeStOZBfNVmXqixjx8zFjF3H6nrAGBni0MAIAmisYgAAAAAAAAgEbFcDqV9+UK5X7xueRymXLfmFjF3T9LQV26eqE6AACaLhqDAAAAAAAAABqNihMndGbBPNnTT5lDi0WR469T9G1TZPX393xxAAA0cTQGAQAAAAAAAHid22FX7qefKP/bryW325T7JSQq7v5ZCux4jReqAwDg6kBjEAAAAAAAAIBXlR85rDML58tx5ow5tFoVdcNNirrlVll9/TxfHAAAVxEagwAAAAAAAAC8wl1ZqZyPl6tg1UrJMEy5f+vWirt/tgLatvN8cQAAXIVoDAIAAAAAAADwuLID+5W56C05srPNoc2m6FsmKeqGm2Tx4S1MAADqCz9VAQAAAAAAAHiMq7xcOcvfV+HqH2vM/du1V/wDs+XfspVnCwMAoBmgMQgAAAAAAADAI0p371Lm4oVy5ueZMouvr6Jvm6LI8dfJYrN5oToAAK5+NAYBAAAAAAAANChXSYmyl72rovXraswDO3VW3MxZ8ouP93BlAAA0LzQGAQAAAAAAADSY4u3blPXOYrkKC02Zxd9fLW6/UxHXjpXFavVCdQAANC80BgEAAAAAAADUO2dRkbKWvq2SrZtrzIO6dVfcjAfkGxPj4coAAGi+aAwCAAAAAAAAqDeGYah4yyZlL31HrpJiU24NDFTMnfcobOQoWSwWL1QIAEDzRWMQAAAAAAAAQL1wFuQr8+3FKt2RUmMe3LuPYu+bKd+oKA9XBgAAJBqDAAAAAAAAAOrIMAwVrVur7PeXyl1ebsqtwcGKvXeaQgcPZZYgAABeRGMQAAAAAAAAwBVz5OYoc/FCle3dU2MeMmCgYqdOl094uIcrAwAA56MxCAAAAAAAAOCyGW63Clf/qOzly2RUVphyW2iYYqdNV+jAQV6oDgAA1ITGIAAAAAAAAIDLYs/MVOait1R+6GCNeeiQoYq9Z5psISEergwAAFwMjUEAAAAAAAAAtWK43SpY+Z1yPvlQht1uym0REYqbfr9C+vT1fHEAAOCSaAwCAAAAAAAAuKTK06eVuXC+Ko4drTEPGzlKMXfeLVtQsIcrAwAAtUVjEAAAAAAAAMAFGU6n8r/9WrmffSLD6TTlPtHRipvxgIJ79PRCdQAA4HLQGAQAAAAAAABQo8q0kzqzYL4qT6bWmEeMHacWU+6UNSDAw5UBAIArQWMQAAAAAAAAQDVuh0N5X3yuvK++kFwuU+4bG6e4+2cpqHMXL1QHAACuFI1BAAAAAAAAAFXKjx1T5sL5sp9ON4cWiyKvu17Rt06W1d/f88UBAIA6oTEIAAAAAAAAQG67Xbmffqz8b7+WDMOU+yW2VNz9sxXYoYMXqgMAAPWBxiAAAAAAAADQzJUfPqQzC+fLkZlpDm02Rd14s6JuvkVWX1/PFwcAAOoNjUEAAAAAAACgmXJXVCjno+Uq+GFVjbME/du0Vdz9sxTQpq0XqgMAAPWNxiAAAAAAAADQDJXt36czi96SMyfHlFl8fBR1yyRFXX+jLD68hQgAwNWCn+oAAAAAAABAM+IqK1PO8vdVmLy6xjygQwfF3T9b/oktPVwZAABoaDQGAQAAAAAAgGaiZNcOZS1ZJGd+vimz+PqqxeTbFTH+OlmsVi9U5xlZWWe0Z88O2e129e7dX4mJrbxdEgAAHkNjEAAAAAAAALjKuUpKlPX+UhVvWF9jHti5i+JmPiC/uHgPV+Y5hmFo796dSknZIuN/91NMTl6pO+64Tz4slwoAaCb4iQcAAAAAAABcxYq3bVHW20vkKi4yZRZ/f8XccZfCR4+5qmcJVlRUaN26H5WefrLa43a7XU6nk8YgAKDZ4CceAAAAAAAAcBVyFhYqa+kSlWzbWmMe1L2H4mbcL98WMR6uzLOyszO1evVKlZWVmrLOnbspICDAC1UBAOAdNAYBAAAAAACAq4hhGCratEFZ774jd0mJKbcGBirm7qkKGz5CFovFCxV6hmEY2rdvt7Zv31S1dOi5+vQZoF69+nmhMgAAvIfGIAAAAAAAAHCVqMzN1ZFXXlfB1m015sF9+ipu+kz5RER6uDLPqqys1Lp1P+rUqVRTFhAQqJEjxyohoaUXKgMAwLtoDAIAAAAAAABNnGEYylq5SqkLF8tVVmbKrSEhir33PoUmDb6qZwlKUk5OllavXqnSUvNsybi4BI0cOU5BQUFeqAwAAO+jMQgAAAAAAAA0Yc6iImUumKfS3btqzEMGJil26n3yCQvzcGWeZRiGDhzYq23bNsrtdpvyXr36qU+fAbJarV6oDgCAxoHGIAAAAAAAANBEle7dozPz35CrqMiU2cLCFDtthkIHDPRCZZ5lt1dq/frVOnnyhCnz9w/QyJFjlZjYyvOFAQDQyNAYBAAAAAAAAJoYw+lUzsfLlf/N1zXmYUOHK+bue2ULCfFwZZ6Xm5uj1au/U0lJsSmLjY3XqFHjFBQUXO3xUnuJ3JJC/a7+1wcAgHPRGAQAAAAAAACaEHvmGWW88ZoqU0+YMt/ICHX42aMy2nfxfGEeZhiGDh7cp61bN9S4dGjPnn3Vt+/AakuHGoahL1IWaWX+Pjks0oTQTrpt0BxPlg0AgFfRGAQAAAAAAACaAMMwVLR+nbKWLpFRWWnKI/r3U6dfPiZbaKjy8kq9UKHn2O12bdiQrNTUY6bMz89fI0aMUatWbao/x2nX4g3/VIojW7L89Nh3xYd1fXmRAgOv7v0XAQA4i8YgAAAAAAAA0Mi5ysqU9fZiFW/eaMosPj5qccdd6nDnbfLxscnlMs+eu5rk5eVo9epVKi4uNGUxMXEaNWqcgoOrLxFaVF6g1zf8QydUUe1xf7dbcjsbtF4AABoTGoMAAAAAAABAI1Z+9IjOvPm6HDnZpswvPkHxDz2igDZtZbFYvFCd5xiGocOHD2jz5vVyu12mvHv33urfP6na0qGSlJGfqle2vao8a/WGqcUwdEdIFwUGRzVo3QAANCY0BgEAAAAAAIBGyHC7lffVF8r99GOphj30wkaOUuw902T19/dCdZ7lcDi0ceMaHT9+xJT5+flp+PBr1bp1O1N24NQ2zTvwvsqr9wrl73ZrZmR/9e5/bwNVDABA40RjEAAAAAAAAGhkHPn5OjP/DZUf2G/KrEFBipvxgEIHDvJCZZ6Xn5+n1au/U1GReenQFi1iNGrUeIWEhJqydQe/0HunfpTbWn0mZbjTrYfb3ai2ncc1WM0AADRWNAYBAAAAAACARqRkR4rOLJgnd2mpKQvs1FnxDz4s3+hoL1TmWYZh6MiRg9q8eZ1cLvPSod269VT//oNls9mqPe423Pp0+0KtLDwgnbe8aiu7Ww/3vl9RiT0btHYAABorGoMAAAAAAABAI+C225X9wfsq/GGVObRYFDXxVkVPvFWW8xphVyOHw6FNm9bq2LHDpszX10/Dho1W27btTVml067FG/+lHfYsU9az0qIHhj6ugIiEBqkZAICmgMYgAAAAAAAA4GWV6enKeONV2dNPmTKfqCjFP/iwgjp38UJlnldQkK/Vq1eqsDDflEVHt9CoUeMVGhpmyooqCvXahn8o1Sg3ZaMdgbp97G9l8w9pkJoBAGgqaAwCAAAAAAAAXmIYhgpX/6Ds99+V4XCY8pABAxU34wHZgoO9UJ3nHT16SJs2rZXT6TRlXbp018CBQ01Lh0rS6YJUvbL1VeVb3dUetxqGptgSdO2E/5DFyluhAADw0xAAAAAAAADwAldJic4sekulKdtNmcXPTzH3TFX4yNGynLdP3tXI6XRq8+Z1OnLkoCnz9fXV0KGj1K5dxxqfuy99m+bvf18V1uqP+7vcuj+ij3oNvK9ZvIYAANQGjUEAAAAAAADAw8oOHtCZea/LmW9eLtOvVWslPPSo/BMTvVCZ5xUWFmj16u9UUGB+LSIjozV69HiFhYXX+Nw1B7/UslM/yG2t3viLcLj0cNsb1KbrhAapGQCAporGIAAAAAAAAOAhhsul3M8/Ud4XKyTDMOUR4yaoxR13yurr54XqPO/YsSPauDG5xqVDO3fupkGDhspmM7+F6Tbc+jhlob4vOCCdNxuwdaVbD/eeqciWvRusbgAAmioagwAAAAAAAIAHOHKylfHm66o4esSU2UJCFffAbIX06ev5wrzA5XJq8+b1Onz4gCnz8fHR0KGj1L79NTU+1+6ya8HGf2lXZZYp610hzRz2KwVEtKz3mgEAuBrQGAQAAAAAAAAaWPHmTcpcslDu8nJTFtStu+JnPySfiAjPF+YFRUWFWr16pfLzc01ZRESURo8er/DwiBqfW1heqFc3/kNphvl1HGsP0G1jfitbYGh9lwwAwFWDxiAAAAAAAADQQNwVFcp69x0VrVtjDm02tbhtiiKvv1EWq9XzxXnBiRNHtWFDshwOhym75pouSkoaLh+fmt+yPFWQqle3vaYCi6va41bD0B2WOI2a8J+y1LDsKAAA+D/8pAQAAAAAAAAaQEXqCWW88ZocmWdMmW9MrOLnPKLADh28UJnnuVwubd26QQcP7jNlPj4+Gjx4hDp27HzB5+9J36q3DixTZfXtBBXgcuuBsF7qkTRDlvP2GgQAAGY0BgGgCfmv//ovffzxxxfM7777bj3zzDMerAgAAAAAcD7D7VbBym+V/eEHkstlykOHDlPctOmyBgR6oTrPKy4u0urVK5WXl2PKwsMjNXr0eEVERF7w+T8e/FIfnvpB7vMaf1EOlx5ufZ1adb++3msGAOBqRWMQAJqgESNGKCYmxvR4v379vFANAAAAAOAsZ2GhziyYp7I9u02ZxT9AcffNUNjQYV6ozDtSU49r/frVcjjspqxDh04aPHiEfH19a3yu23Drw5RF+rFgv3ReU7BtpUsP9ZyuiNZ9G6JsAACuWjQGAaAJeuihhzR48GBvlwEAAAAAOEfpnt06M/9NuYqLTFlA+w6Kn/OI/GJjvVCZ57lcLm3fvkn79+8xZTabTYMHj9A113S54PMrXXa9tfFf2lOZZcr6VEgzh/xK/lGt6rVmAACaAxqDAAAAAAAAQB24HQ7lfrRc+d99Yw4tFkXecJNaTJosi0/zeCuupKRYyckrlZOTbcrCwsI1evQERUZGXfD5+RUFem3DXJ0yyk3Z+Ep/3Trmt7IFhtVrzQAANBfN47cRAAAAAAAAoAHYz5xRxhuvqvJkqimzhUco4cGHFNStuxcq8460tBNat+5H2e3mpUPbt79GQ4aMvODSoZKUVpCqV7e9pkJL9b0ZbYahOxWjERP+UxYfv3qvGwCA5oLGIABcRElJidatW6dNmzZp3759OnHihIqLi+Xv76/Y2Fj17t1bEydO1MiRI2U5b7+DhvTdd9/pu+++k91uV0JCgoYPH67evXt7bHwAAAAAaO4Mw1DRurXKevdtGZWVpjy4dx/FPTBbPqHNY2ab2+3W9u2btW/fLlNmtdqUlDRMnTp1vejfzrvSt2rBgWWyn3dIoMutB0K6q/uQ+2WxWOu7dAAAmhUagwBwAQsWLNDcuXNVWcMfeE6nU8ePH9fx48f16aefauDAgfrb3/6mxMREj9S2ZMmSavdffPFFjR49Ws8//7wiIiI8UgMAAAAANFeusjJlvb1IxZs3mTKLj49a3HWPIsaM8+gHSL2ptLREycmrlJ2dacpCQ8M0evR4RUW1uOg5vj/0hT5K+1HGea9ZtN2lh1uNU8ueN9VnyQAANFs0BgHgAo4fP17VFIyLi9OwYcPUo0cPRUdHq7KyUjt27NBnn32msrIybd26VdOnT9eyZcsUHR3dYDV17dpVTz31lIYMGaKEhATl5eVp8+bN+sc//qHVq1frkUce0dKlS2W18glKAAAAAGgI5UePKOPN1+TMyTFlfgmJSnjoUfm3bu2Fyrzj1KmTWrv2B9nt5g/Vtm3bQUOHjpKf34WX/nQbbn2QskjJBful85qC7SqceqjnfQpv07/e6wYAoLmyGIZheLsIAGiMnnrqKZ06dUqzZs3S0KFDa2y2paena/bs2Tp+/LgkacqUKXr22WdNxz3xxBPatcu8nMrFTJgwQY8//nitjs3MzNStt96qgoICvfjii7rxxhsva6z6kp9fJqfTdekD0WRERQXLZrPK5XIrL6/U2+XgKsK1hYbAdYWGwHWFhsK11fQYbrfyvlyh3M8+kdxuUx4+6lrF3H2vrP7+ni/uf3nyunK73UpJ2aK9e3eaMqvVqoEDh6pLl+4XnTVZ4azQ/E0va19llinrV27oviG/UEB023qtu6ny8bEpMjLI22UAAK4CzBgEgAv41a9+dcllOVu2bKkXX3xRkyZNkiR99dVXevLJJxUYGFjtuIyMjKrmYW1lZ2fX+ti4uDhNmTJFb731lpKTk73WGAQAAACAq5EjL09n5r2u8kMHTZk1KEhxM2cpdMBAL1TmHWVlpUpOXqWsrDOmLCQkVKNHj1d0dMxFz5FXnq9XN/1Tp91lpmxChZ9uufY3sgVH1FfJAADgf9EYBIALqO1efV27dlX79u11/PhxlZeXKzU1VV27dq12zPl7AjaEdu3aSZKyssyftAQAAAAAXJmSlG06s/AtuUvNM/ACO3VW/IMPy7cBt5RobNLT07R27Q+qrKwwZW3atNOwYddedOlQSTqZf0Kvbn9dRZbqK87YDEN3u6M17LpfyeLjvZmXAABczWgMAkA9CAkJqbp9dl9CTyssLJQk02xFAAAAAMDlc9vtyn7/XRWu/sEcWiyKvvU2Rd18iyzNZI93t9utnTu3affuFFNmtVo1YMAQde3a46JLh0rSjtNbtXD/MjnOOyzI5dYDQV3VbdgsWSzN4zUFAMAbaAwCQB3Z7XadOHGi6n5iYqLHazAMQ99++60kqWfPnh4fHwAAAACuJpXpp5Tx+quyn043ZT5R0UqY87ACO3X2QmXeUVZWpjVrVikzM8OUBQeHaPTo8WrRIvai5zAMQ6sOf6lP0lbLOK8p2MLu1EOJY9Sy9y31WTYAAKgBjUEAqKMVK1aouLhYktSjRw/FxFx8H4UrtW/fPh09elTXX399tWVZSkpK9Oyzz2r37t0KCgrS7bff3iDjAwAAAMDVzjAMFf6wStnL3pPhdJrykIGDFDf9ftmCg71QnXdkZKRrzZrvVVFRbspat26rYcOulb//xZf9dLlden/HIq0rOCCd1xTsUOHUg93vVXi7QfVZNgAAuACLYRiGt4sAgKYqLy9PEydOVG5uriTp5Zdf1oQJExpkrJUrV+rnP/+5wsPD1bNnT0VGRionJ0f79+9XYWGhgoKC9OKLL2r06NENMn5tuFxur42NhmG1WmSxWGQYhtxufmVA/eHaQkPgukJD4LpCQ+HaanwcRUU69u9Xlb9lqymz+vur3ewHFDNu7CWXyvSm+ryu3G63Nm3apE2bNtYwjlUjRoxQv379L/l6lDnK9Y+Vf9fe0jOmbGC5W4+O/42CEjrWqdbmwmZjiVUAQN3RGASAK2S32/XAAw9o69af/mgcP368/v3vfzfYeGlpaVq0aJF2796t9PR0FRQUyNfXVy1bttSwYcM0Y8YMtWrVqsHGBwAAAICrVcGu3To891+y5+WZsuD27dX5N/+poGb091Zpaam+/PJLnTx50pSFhoZq4sSJtdpGI6skR//vm+d02llqym6q9NW0256Sb1h0vdQMAABqh8YgAFwBt9utJ554Qp9//rkkqU2bNlq+fLnCw8O9XJl3MWPw6sMn2dFQuLbQELiu0BC4rtBQuLYaB7fTqVPvL9Ppjz6RaniLLH7izWozfZqsvr6eL+4K1Md1lZaWpq+++lJlZWWmrH379rruuusVGBh4yfMcyTmq53/8p4rlqva4j9vQNEsLXTfp97L6Xfo8+D/MGAQA1AcagwBwmQzD0JNPPqlly5ZJkhITE7VkyRJm60nKzy+T0+m69IFoMqKigmWzWeVyuZWXZ/6UL3CluLbQELiu0BC4rtBQuLa8z56dpTNvvqaKY8dMmS00VHEPPKiQ3n28UNmVq8t1ZRiGdu9O0c6d23T+24UWi0X9+g1Sjx59arWUasrprVq0f5kc5x0a7HRrVmAndRk+RxYrTa7L4eNjU2RkkLfLAABcBXy8XQAANCWGYehPf/pTVVMwPj5eixYtoikIAAAAAE1I0aaNylqyUO6KClMW1L2H4mfNkU9EhOcL85KKinKtWfODMjJOmbKgoGCNHDlOcXHxlzyPYRj67vAX+vRUsnReUzDW7tRDCdcqoc+t9VU2AAC4AjQGAaCWDMPQ008/rffee0+SFBcXp8WLF6tNmzZergwAAAAAUBvuinJlLX1bRevXmUObTS2m3KHICdc3q9lsmZkZSk5epfJy89KhiYmtNGLEGAUEXHrJT5fbpfd2LtL6/AOm7Jpyh2Z3vUdhHQfXS80AAODK0RgEgFo42xR89913JUmxsbFavHix2rZt6+XKAAAAAAC1UXHiuDLeeE2OrExT5hsbp4SHHlFAu/ZeqMw7DMPQ3r07lZKypcalQ/v2HaiePfvWaunQMke53tzybx2qyDJlg0pdmjr4F/KL7VhvtQMAgCtHYxAALuH8pmBMTIwWL16sdu3aebcwAAAAAMAlGW638r/7RjkfLZdc5j3Rw4YNV+zU+2Stxay4q0VFRYXWrftB6elppiwwMEgjR45VfHxirc6VXZarV7e8pEyXecbhTWU23XDtE7KFtqhzzQAAoH7QGASAS3jmmWdMTcH27ZvPp0gBAAAAoKlyFhbozFvzVLZ3jymzBgQodvpMhQ0e6oXKvCcr64ySk1eprKzUlCUktNSIEWMUGBhUq3Mdyz+m11PeVImqN1x93IbudYZr8HWPy+LXfBquAAA0BTQGAeAi/vznP2vp0qWS/q8p2KFDBy9XBQAAAAC4lJJdO5W5YJ5cxcWmLKBDB8XPeUR+MbFeqMw7DMPQvn27tX37JtPSoZLUp88A9erVT9Za7q+4LX2zFh9YLud5K42GOF2a7X+NOo2dI4uVtx4BAGhs+OkMABcwd+5cvf3225J+2l9hxowZOnbsmI4dO3bR53Xv3l2JibVbcgUAAAAAUL/cDodyPvxABSu/NYcWi6JuvFnRt94mi0/zeVussrJS69b9qFOnUk1ZQECgRo4cq4SElrU6l2EY+vrwF1pxKlk6rykYV+nUQ3EjFNdvcq32JgQAAJ7XfH4DAoDLtH379qrbhmHohRdeqNXznn32WU2ZMqWhygIAAAAAXIA947Qy3nhNlWknTZktIkIJsx9SULfuXqjMe3JysrR69UqVlpaYsri4BI0cOU5BQbVbOtTpdmrpzsXalH/AlHUqc2hW1zsVds2wOtcMAAAaDo1BAAAAAAAANGmGYahobbKy3n1Hht1uyoP79lP8zFmyhYZ6oTrvMAxDBw7s0bZtm+R2u01579791bt3/1ovHVrmKNMbW17R4YosUzakxKm7kx6VX3znOtcNAAAaFo1BALiAJUuWeLsEAAAAAMAluMpKlbl4oUq2bjFlFh8fxdx1j8LHjGtWS1tWVFRo9ervdPLkCVPm7x+gkSPHKjGxVa3Pl12Wq1e2vKQsV1m1xy2GoZtLrbpu9H/JFt589msEAKApozEIAAAAAACAJqn88GFlzHtNztxcU+aX2FIJDz0i/1atvVCZ92RmZuqzzz5TUVGRKYuNjdeoUeMUFBRc6/MdyT+q11PmqUyuao/7ug1NdYRp0PW/lsW/9ucDAADeRWMQAAAAAAAATYrhdivvi8+V+9knkmGY8vDRYxRz1z2y+vt7vjgvMQxDO3fuUHJyslwulynv2bOv+vYdWOulQyVpc/pmvX1guVznTbYMdbo027e9rhnziCw23l4EAKAp4Sc3AAAAAAAAmgxHXq7OzHtD5YcOmjJrULDi7p+l0P4DvFCZ99jtldqwYY1SU4+ZMj8/f40YMUatWrWp9fkMw9CXh1boy/Q10nlNwfhKh+bEDFPcgDua1fKsAABcLWgMAgAAAAAAoEko3rZVmYsWyF1WasoCO3dR/IMPyzcqyguVeU92dpbWrFmlkpJiUxYTE6dRo8YpODik1udzuJ16Z+cibck3N167lNn1QOfbFdp5ZJ1qBgAA3kNjEAAAAAAAAI2au7JS2e+/q8LkH82h1aroW29T1E0TZbmMZTKbOsMwtHfvLqWkbJZRw3Kq3bv3Vv/+SZe1dGiJo1RvbHlVRyuyTNnQYofuTHpE/gld61Q3AADwLhqDAAAAAAAAaLQq09KU8carsmecNmU+0dFKmPOIAq/p5IXKvKe8vFzr1v2o06fTTFlAQIAmTLhOkZHxl3XOrLJsvbLl38p2lVV73GIYuqXEognX/k7W8Ms7JwAAaHxoDAIAAAAAAKDRMQxDBT+sUs6y92Q4naY8dFCSYqfPlC0o2AvVeU9GRrrWrv1B5eVlpiwxMVE333yzgoNDlJdnXm71Qg7nH9EbKfNVJle1x/3cbk2rDNWA6x+XJaD2y5ECAIDGi8YgAAAAAAAAGhVXaanOLJyv0pTtpszi56fYqfcpbPhIWSwWL1TnHW63W7t2bdeuXebXRJJ69eqnMWNGydfXRy6Xu9bn3ZS+Se8c+FCu817KMKdLs33aqOMNP5PF5luX0gEAQCNCYxAAAAAAAACNRvnRI8p4/VU583JNmX+btkp46BH5xSd4oTLvKS0t0Zo13ysr64wpCwwM1IgRY5WQ0PKy9hM0DEMrDn2ur9PXSuc1BRMrHHqwxWDFDrq7WTVfAQBoDmgMAgAAAAAAwOsMt1v5X3+pnE8+ktzmGW8RE65Xiyl3yOrbvGavpaWlat26H2W3V5qyxMRWGj58jAIDAy/rnA6XQ0t2LdK2/EOmrFupXfdfM0kh3cZccc0AAKDxojEIAAAAAAAAr3IWFurMW2+qbO8eU2YNCVH8rDkK6d3HC5V5j8vl0vbtm7R/v/k1sVgs6tdvkHr06HPZM/qK7SV6fdtrOl6eZcpGFNl1+8AH5deq5xXXDQAAGjcagwAAAAAAAPCasv37lDHvdbkKC01ZYOcuip/ziHwjI71QmfcUFRUqOXmV8vJyTFlISKhGjhyrmJi4yz7vmdIsvbL1FeW6yqo9bjEMTSqWxo5+QrbIxCuuGwAANH40BgEAAAAAAOBxhsul3M8/Ud4XKyTDqB5aLIqaeKuib5kky2Xsm3c1OHbsiDZuXCOn02HK2rRpr2HDRsnPz/+yz3sg75Dm7VigcrmqPe7vduu+8iD1vf43sgaGXXHdAACgaaAxCAAAAAAAAI9y5OXpzJuvqfyweY87W3iEEuY8rKCu3bxQmfc4HA5t2bJeR44cNGVWq02DBg1V587dLnvpUElaf2qD3j34sdznPTXc4dKD1pZqf+MvZPHxu9LSAQBAE0JjEAAAAAAAAB5TsiNFZxbMk7u01JQF9eyt+NkPyie0ec1cy8/PU3LyShUWFpiy8PAIjRo1TpGR0Zd9Xrfh1meHPtN36eul85qCrSocejBygFoMmSqLpXnNygQAoDmjMQgAAAAAAIAGZzidyl6+TAUrvzWHNptaTL5dkdfd0KyWDjUMQ4cP79eWLRvkcrlMeceOnZWUNFy+vr6XfW67y6HFOxcqpeCwKetRUqnpHW9RaI/xV1Q3AABoumgMAgAAAAAAoEHZs7KU8carqjxx3JT5tGihhDmPKLDjNV6ozHvs9kpt2LBGqanHTJmPj6+GDBmhDh06XdG5CyqK9OKmF5VakW3KRhVWavLAB+XXutcVnRsAADRtNAYBAAAAAADQYIo3b1Lm4gVyV1SYspABAxU38wHZgoK9UJn3ZGdnac2aVSopKTZlUVHRGjVqvMLCwq/o3KcKM/SXVS8o11F9qVarYei2QreuvfY3skW1vqJzAwCApo/GIAAAAAAAAOqdu7JS2e8vVWHyalNm8fFRzN1TFX7tGFkslhqefXUyDEN79+5SSspmGYZhyrt27akBAwbLZrNd0fl3ndmnueteU4VRfVlSf5db08sD1eeGx2UNiriicwMAgKsDjUEAAAAAAADUq8rT6cp47RXZT6ebMt+4eCU+8jP5t27jhcq8p7y8XOvW/ajTp9NMmZ+fv4YPH63Wrdtd0bkNw9CaU+v0weHP5D4vi3C49KAS1O7Gx2Tx9b+i8wMAgKsHjUEAAAAAAADUC8MwVLRujbKWvi3DbjflYUOHK3badFkDArxQnfdkZKRr7dofVF5eZspiY+M1cuRYBQeHXNG57S673t37vjbn7DZlrSscmh3eRy2GzpDFar2i8wMAgKsLjUEAAAAAAADUmbuiXJlLFql400ZTZvH3V9y0GQobNtwLlXmP2+3Wrl3btWvX9hrzXr36qU+fAbJeYdMuuyxXb6a8qfTKPPO5Syp0X/ubFNLrhis6NwAAuDrRGAQAAAAAAECdVKSeUMbrr8qRlWnK/Fq1VuIjP5NffIIXKvOe0tISrVnzvbKyzpiywMBAjRgxVgkJLa/4/Luy92nRnrdVYThN2ZjCCt3a7wH5tet3xecHAABXJxqDAAAAAAAAuCKGYahg1UrlLH9fhtPcoAofM1Yxd90jq6+fF6rznrS0VK1b96Ps9kpTlpjYSsOHX6vAwKArOrfbcGvFkS/1TVqyKfNzu3V3sVXjb/5vlfjFXtH5AQDA1Y3GIAAAAAAAAC6bq6REZxbOV+mOFFNmDQxU3P2zFDpgkBcq8x6Xy6Xt2zdp//49psxisahfv0Hq0aOPLBbLFZ2/xF6qt3Yu0MHik6Ys1u7UbJ8E9b7vt5J/sJRXekVjAACAqxuNQQAAAAAAAFyW8iOHlfHGq3Lmmfe2C+jQQQlzHpVvTIwXKvOeoqJCJSevUl5ejikLCQnVyJFjFRMTd8XnTy1K05sp85XvKjNlvYsrdG/iKLWdcJ9svr5yudxXPA4AALi60RgEAAAAAABArRhut/K//lI5n3wkuc3Np8jrb1SLybfL4tO83nI6duyINm5cI6fTYcratGmvYcNGyc/P/4rObRiG1qZv1AeHPpFLRrXMahi6qcCu8QNnybdtX1mstisaAwAANB/N67c0AAAAAAAAXBFnYaHOzH9DZfv2mjJbSKjiZ89RcK/eXqjMexwOh7ZsWa8jRw6aMqvVpkGDhqpz525XvHSo3eXQe/ve16bsXaYs1OnSfWWB6j7ht7KGsZ8gAACoHRqDAAAAAAAAuKjSfXt1Zt7rchUVmbLALl2VMOdh+UREeqEy78nPz1Ny8koVFhaYsvDwCI0aNU6RkdFXfP6c8ly9kTJP6RW5pqx9uV3Tg7oqduIsWXz8rngMAADQ/NAYBAAAAAAAQI0Ml0u5n32ivC9XSEb1ZSxlsSj6lkmKmnirLFardwr0AsMwdPjwfm3ZskEul8uUd+zYWUlJw+Xr63vFY+zO3qdFe95WueE0ZSMLyjWp62QFdBtzxTMRAQBA80VjEAAAAAAAACaOvFydefN1lR8+ZMpsERFKmPOIgrp09UJl3mO3V2rDhjVKTT1mynx8fDVkyAh16NDpis/vNtz64shX+jpttSnzc7t1Z6GhwSN/JVtshyseAwAANG80BgEAAAAAAFBNyY4UnVkwT+7SUlMW3Ku34mY9KJ/QMC9U5j3Z2Vlas2aVSkqKTVlUVLRGjRqvsLDwKz5/iaNUC3Ys1IHiVFMWa3dqpjtGbW7+hawBoVc8BgAAAI1BAAAAAAAASJLcDodyPlymgpXfmUObTS2m3KHICdc3u6VD9+7dpZSUzTLOX05VUteuPTVgwGDZbLYrHiO1KE1vpsxXvqvMlPUurtC9CSMUOvCOZvW6AwCAhkFjEAAAAAAAALJnZirjjVdVmXrClPm0aKGEhx5VYIeOni/Mi8rLy7Vu3Y86fTrNlPn5+Wv48NFq3brdFZ/fMAytS9+oZYc+kUvVm45Ww9CNBZWaMOB++bbrf8VjAAAAnIvGIAAAAAAAQDNXtHmjshYvlLuiwpSFDBiouJkPyBYU7IXKvCcjI11r1/6g8nLzLL6YmDiNGjVOwcEhV3x+u8uh9/ct08bsnaYs1OnSfWUB6j7+N7KGx13xGAAAAOejMQgAAAAAANBMuSsrlfXeOypak2zKLD4+irlnqsJHj5HFYvFCdd7hdru1c+c27d6dUmPeq1c/9ekzQNY6LOuZU56rN1Pm6VRFrilrV27XjMDOip04WxYf/yseAwAAoCY0BgEAAAAAAJqhyvR0Zbz+iuyn002ZX3yCEh7+mfxbt/ZCZd5TWlqiNWu+V1bWGVMWGBioESPGKiGhZZ3G2JOzXwt3L1G54TRlIwrKdVuXSQroPq5ZNWMBAIDn0BgEAAAAAABoRgzDUNGaZGW9944Mu92Uhw0bodhp02X1b16z1dLSTmjdutWy2ytNWWJiKw0ffq0CA4Ou+Pxuw60vj36tr07+aMr83G7dUWhoyMj/lC22ee3jCAAAPIvGIAAAAAAAQDPhKi9X1pKFKt68yZRZ/P0Vd98MhQ0d7oXKvMflcmn79k3av3+PKbNYLOrXb5B69OhTpxl8JY5SLdy5UPuLUk1ZjN2pma5otb3pMVkDw654DAAAgNqgMQgAAAAAANAMVJw4oYzXX5EjO8uU+bdurYSHfya/+AQvVOY9RUWFSk5epby8HFMWEhKqkSPHKiYmrk5jnCw6pTd2zFO+s8yU9Squ0L3xwxQ26E5ZrLY6jQMAAFAbNAYBAAAAAACuYoZhqGDVd8r+4H3J5TLl4WPGKeauu2X19fNCdd5z7NgRbdy4Rk6nw5S1adNew4aNkp9f3ZZTXXdqo5Yd+lhOGdUetxqGbsyv1PgBM+TXfmCdxgAAALgcNAYBAAAAAACuUq6SEp1ZOF+lO1JMmTUoSHEzZyl0QPNqTDkcDm3Zsl5Hjhw0ZVarTYMGDVXnzt3qtHSo3eXQ+/uWaWP2TlMW6nRpWqm/ekx4XNbw+CseAwAA4ErQGAQAAAAAALgKlR8+pIw3X5MzL8+UBXToqISHHpFvixgvVOY9+fl5Sk5eqcLCAlMWHh6hUaPGKTIyuk5j5JTn6c2UeTpVYV6etF25XTMCr1HsxDmy+NZtNiIAAMCVoDEIAAAAAABwFTHcbuV99YVyP/1YcrtNeeQNN6nFbVNk8Wk+bwsZhqFDh/Zr69YNctWwnGrHjp2VlDRcvr6+dRpnT85+Ldr9tsoM8/KkIwrKdVunWxTQc0KdZiMCAADURfP5DRAAAAAAAOAq5yws0Jl5b6ps/15TZgsNVfzsOQru2dsLlXmP3V6pDRuSlZp63JT5+PhqyJAR6tChU53GcBtufXn0G3198ofzdhOU/Nxu3VHg1pAR/yFbfN3GAQAAqCsagwAAAAAAAFeB0r17dGbeG3IVF5mywK7dlPDgQ/KJiPRCZd6TnZ2lNWtWqaSk2JRFRUVr1KjxCgsLr9MYpY4yLdi5UPuLTpiyGLtTM51RanvTY7IG1W0cAACA+kBjEAAAAAAAoAkzXC7lfvqx8r76QjLOm69msSj61tsUdfMtslit3inQCwzD0N69u5SSslnG+a+JpK5de2rAgMGy2Wx1Gudk8Sm9mTJfec5SU9arpEL3xAxR+OC7ZbHWbRwAAID6QmMQAAAAAACgiXLk5irjjVdVcfSIKbNFRCjhoUcV1LmLFyrznvLycq1b96NOn04zZX5+/ho+fLRat25X53HWpW/SsoMfyXne4qFWw9AN+ZWa0O8++XVMqvM4AAAA9YnGIAAAAAAAQBNUsiNFZ96aJ3eZebZacO8+in/gQdlCQ71QmfdkZKRr7dofVF5eZspiYuI0atQ4BQeH1GkMh8uh9/cv14asFFMW4nRpWqmfeo7/tawRCXUaBwAAoCHQGAQAAAAAAGhC3A6HcpYvU8Gq78yhzaaY2+9UxITrZbFYPF+cl7jdbu3cuU27d5ubdZLUq1c/9ekzQNY6LqeaW56nN1Lm6VRFjilrW27XDP8Oipv4kCy+AXUaBwAAoKHQGAQAAAAAAGgi7JmZynj9FVWeTDVlvi1ilPDwowpo38ELlXlPaWmJ1qz5XllZZ0xZYGCgRowYq4SElnUeZ2/uQS3ctVhlhsOUDS8o1+RrblZAr+bVkAUAAE0PjUEAAAAAAIAmoGjTRmUtWSh3RYUpCxmYpLgZ98sWFOSFyrwnLe2E1q1bLbu90pQlJrbS8OHXKjCwbq+J23Dr62Pf6svU78/bTVDyc7t1e4FLQ0Y8Jp/4znUaBwAAwBNoDAIAAAAAADRi7spKZb37jorWJpsyi6+vYu6ZpvBRo5vVTDWXy6Xt2zdp//49psxisahfv0Hq0aNPnV+TUkeZFu5arH2Fx0xZjN2pGc5ItbvpMVmDIuo0DgAAgKfQGAQAAAAAAGikKtNPKeP1V2Q/fdqU+cUnKOGRn8m/VWsvVOY9RUWFSk5epbw88z5/wcEhGjVqnGJi4uo8Tlpxut5Imac8Z6kp61FSoaktkhQ+5B5ZrLy9BgAAmg5+cwEAAAAAAGhkDMNQ4ZrVyn73HRkO8552YcNHKnbqfbL6+3uhOu85duyINm5cI6fT/Jq0adNew4aNkp9f3V+T9ac36/0DH8p53uKhVsPQDfkVmtD3PvldM7jO48A7DMPQ6dwylVc61SExTNZmNNsWAAAagwAAAAAAAI2Iq7xcWYsXqHjLZlNm8Q9Q3PQZChsyzAuVeY/D4dDmzet09OghU2a12jRo0FB17tytzkuHOlwOLTvwodZnbjdlIU63ppX4qMe4/5ItsmWdxoH3FJfZtfCrA0o5/NOM06RusXr41h7NaileAEDzRmMQAAAAAACgkag4cVwZr78iR3a2KfNv3UYJD/9MfvHxXqjMe/Lz85ScvFKFhQWmLDw8QqNGjVNkZHSdx8ktz9ebO+Yprdz82rctt2uGXzvFTXxYFr/AOo8F79h/Ik9vrtinghJ71WOb92fpvuu6KCTQ14uVAQDgOTQGAQAAAAAAvMwwDBWs/FbZy5dJLpcpjxg7Ti3uvFtWXz8vVOcdhmHo0KH92rp1g1w1vCYdO3ZWUtJw+frWvaGzL/egFuxarDLDvETpsIJyTe54gwJ738SssibK6XLrkzXH9dXG1PMWh5XCg/3k52P1Sl0AAHgDjUEAAAAAAAAvcpWU6MyCeSrducOUWYOCFHf/bIX2H+D5wrzIbrdrw4ZkpaYeM2U+Pr4aMmSEOnToVOdx3IZbXx/7Tl+mrjI1jHzdhu4ocGjIsJ/LJ7FrnceCd2Tll+n1z/bpeEaRKQsN8tWjt/WUn6/NC5UBAOAdNAYBAAAAAAC8pGj/fqX+/UU58/NMWUDHa5Tw0CPyjW7hhcq8JycnS8nJq1RSUmzKoqKiNWrUeIWFhdd5nDJHmRbuWqy9hebmYwu7UzMc4Wp/43/IGhxZ57HgHRv2nNGSbw+qwm6ecdqjXaQenNhd4SH+XqgMAADvoTEIAAAAAADgYYbLpbQPP9LJd9+X3G5THnnjzWoxabIsPs3nrRvDMLR//25t375Z7hpek65de2rAgMGy2eo+uyutOF1v7pivXEeJKetRUqGp0QMVPmSqLLbm8/pfTcornVry7UFt3JtpymxWi24f3VHXJbWWlaVhAQDNEL/dAAAAAAAAeJCzsED7/zVfRbt2mzJbaKjiZz+k4J69vFCZ91RUVGjduh+Vnn7SlPn5+WvYsNFq06ZdvYy1/vRmvX/gIzlVvfloMQzdkF+h6/pMlV+nofUyFjzv6OlCvfHZXmUXVJiyuMhAPTyph9rFh3mhMgAAGgcagwAAAAAAAB5gGIaKt2xS9tJ35KphmczArt2U8ODD8omI8HxxXpSZmaE1a75XWVmpKYuJidPIkeMUEhJS53EcLoc+OPCh1mVuN2UhTremltjUc+zvZItqVeex4Hlut6GvNqXqkzXH5XKfv2OkNKJXgqZO6KQAP94OBQA0b/wkBAAAAAAAaGDOgnxlvr1YpTtSzKHFouhJkxV100RZrFbPF+clbrdbe/bs0M6d22QY5kZOz5591bfvQFnr4TXJLc/XvB3zdbI8y5S1KXdopm8bxU18RBa/wDqPBc/LL67Um5/v1YGTBaYs0N9HM2/ooqRucZ4vDACARojGIAAAAAAAQAMxDENFa5OVvew9ucvLTblfVJRiH3xYQZ27eKE67ykrK9Patd/rzJnTpiwgIFAjRoxRYmL9zNzbn3tIb+1epDK3w5QNKyjTbe1vUFDfm2Vhv7kmKeVQtt76cr9KK5ym7JqW4Xrolu5qEUHDFwCAs2gMAgAAAAAANABHTrYyFy1U2f69NebRQwer3cMPqdhp83Bl3nX69CmtXfuDKirMjdL4+ESNGDFWQUFBdR7Hbbj1zfGV+uLESp0/H9HXbej2fIeGDvuZfFp2r/NY8Dy7w6X3vz+iH1LSTZnFIt0yrJ1uGd5OtmY0CxcAgNqgMQgAAAAAAFCPDLdbBT+sUs5Hy2VUVppyW2iYOjz8oGJHDpfL5ZbyzHvrXY3cbrd27NiqPXt2mDKLxaI+fQaoZ8++9bJ0aJmjTIt2L9GegqOmrIXdqRn2MLW/8T9kDYmq81jwvFNZJXr9s71KzzH/24kK89dDt/RQ59YRni8MAIAmgMYgAAAAAABAPbGfyVDmogUqP3yoxjx06DDF3j1V0W2a135nJSUlWrNmlbKzM01ZUFCwRo4cq7i4hHoZK634tN7cMV+5jmJT1qOkQvdG9lfEhPtksfG2WFNjGIa+356u978/IqfLbcoHdonRzBu7KjjA1wvVAQDQNPAbEAAAAAAAQB0ZLpfyv/1auZ9+LMNp3uvMJzJKsdNnKqR3Hy9U511paSe0bt1q2e3m2ZMtW7bR8OHXKiAgoF7G2nB6i9478KGcqt40shiGbsiv0HW975Ff5+H1MhY8q7jMrgVfHtCOIzmmzM/XqqnjO2tk7wT2igQA4BJoDAIAAAAAANRBZVqaziycr8rUEzXm4aOvVYs77pYtMNCzhXmZy+XS9u2btH//HlNmtVrVv3+SunXrVS+NHIfbqQ8OfKh1Z7aZsmCnW1OLreo15gnZolvXeSx43r4TeXpzxT4VlthNWZvYED08qYcSooO9UBkAAE0PjUEAAAAAAIArYDidyv3ic+V9uUJyuUy5b0yM4mbOUlDXbl6ozruKigqVnLxKeXnm2V0hIaEaNWqcWrSIrZex8iry9eaO+TpZlmXK2lQ4NMPWWvETH5bFn8ZRU+N0ufXxmmP6euNJGTXkEwa21h3XdpSvT933pQQAoLmgMQgAAAAAAHCZKo4f05mFb8mefsocWiyKGH+dWtw2RVZ/f88X52XHjx/Rxo1r5HA4TFnbth00dOgo+fn51ctY+/MOacGuxSp1m2eSDS0s1+S2ExTUb6IsFhpHTU1mfpne+GyvjmeY94oMDfLV7Ju7q3fHaC9UBgBA00ZjEAAAAAAAoJbcdrtyP/1I+d9+IxnmOUx+CYmKu3+WAjte44XqvMvpdGrz5vU6cuSAKbNabUpKGqpOnbrVy9KhbsOtb46v0hcnvjPNJPN1G7o936GhQx+RT6sedR4LnmUYhtbvOaO3vzukSrt5Jm6P9lF68OZuCg9pfk13AADqA41BAAAAAACAWig7dFCZC9+SIyvTHFqtirrxZkVNvFVWX1/PF+dlBQX5Sk5eqYKCfFMWHh6hUaPGKTKyfmZ3lTnKtWj3Eu0pOGLKou1OzagMVYcb/0PWEGaTNTXllU4t+eagNu4z/xuzWS2649qOmjCotaz10FwGAKC5ojEIAAAAAABwEe6KcmV/+IEKf/i+xty/dRvFPTBbAW3aergy7zMMQ0eOHNTmzevkqmGfxY4dOyspabh866lZeqr4tN7YMV+5DvPykt1LKjU1orciJsyQxdb8mrNN3dH0Qr3+2V7lFFaYsrioID1yaw+1jQ/1QmUAAFxdaAwCAAAAAABcQOnePcpctEDOvFxTZvHxUdQtkxR1/Y2y+DS/t1gcDrs2blyr48fNM/d8fHw0ePAIdezYud7G25ixVe/tXy6H3NUetxiGrs8v1/U975Jf11H1Nh48w+029OXGVH2y5rjcNSzPO7J3gu4d30kBfs3v3xgAAA2Bn6gAAAAAAADncZWWKvv9d1W0fm2NeUCHjoq7f7b8ExM9XFnjkJubo+TklSouLjJlkZFRGjVqvMLDI+plLIfbqeUHPtLaM1tNWbDTranFFvW69reytWh+MzaburyiCs1bsU8HThaYskB/H828oYuSusV5vjAAAK5iNAYBAAAAAADOUZKyTZlvL5arsNCUWfz81GLy7YoYN0EWq9UL1XmXYRg6eHCvtm7dKLfbbcq7dOmugQOHyGarn7ec8isK9OaO+UotM+8517rCoRnWlkqY+Kgs/sH1Mh48Z/uhbC34cr9KK5ym7JpW4Xrolu5qER7ohcoAALi60RgEAAAAAACQ5CwqUtbSt1WydXONeWDXboqb8YD8YmM9XFnjUFlZqQ0bVuvkyROmzNfXT8OGjVLbth3qbbzdOfu0eM9SlbntpmxIQbmmtB2roP6TZLE0vwZtU1bpcOn974/ox5R0U2axSLcOb6+Jw9rK1gwb7wAAeAKNQQAAAAAA0KwZhqHizRuV9e47cpeUmHJrQIBa3HmPwkeNlsVi8UKF3pednank5FUqLTW/Pi1axGjkyHEKDQ2rl7Ecbqc+Pfy5fkjfYMp83Yam5Fdq2JCH5NO6V72MB89JyyrR65/t1emcUlMWHeavObf0UOfWEZ4vDACAZoTGIAAAAAAAaLYc+fnKWrJQpbt21pgH9+qt2Okz5RsV7eHKGgfDMLR3706lpGyRYRimvHv33urXb5BsNlu9jJdVlq35OxfqVHm2KYu2OzWjIkQdbnhC1tCYehkPnmEYhlZtO6VlPxyV02VegnZg11jNvKGLggN8vVAdAADNC41BAAAAAADQ7BiGoaI1ycr+4D25y8tNuTU4WLH3TFPokKHNdpZgeXm51q37QadPnzJl/v7+Gj58jFq1alNv420+s13v7f9AlYbLlPUsqdA9Yb0UMX6mLD5+9TYmGl5RmV0LvtivnUdzTZmfr1VTx3fWyN4JzfbfGQAAnkZjEACaiE2bNmnGjBmXPG7w4MFavHixByoCAAAAmiZHdrYyFy9Q2f59NeYhAwYqdup0+YSHe7iyxuPMmdNas+Z7lZeXmbK4uASNGDFGwcEh9TJWpcuuZfuXa2PWDlPm4zZ0S16FRvW5W35dRtTLePCcvSfyNG/FPhWWmPeJbBMboocn9VBCdLAXKgMAoPmiMQgATUSLFi00efLkC+bfffedSkpKlJSU5MGqAAAAgKbDcLtV8P0q5Xz0gQy7uVFhCwtT7LQZCh0w0AvVNQ5ut1u7dm3Xrl3ba8x79+6v3r37y2q11st46SUZmr9zgTIrC0xZrN2paeXB6nDd47JGxNfLePAMp8utj5KP6etNJ2vMrxvUWreP7ihfn/q5jgAAQO3RGASAJqJjx4567rnnaswyMjL06aefymKxaNKkSR6uDAAAAGj87BmndWbhW6o4eqTGPGzocMXcfa9sIfUzC64pKisr1Zo13yszM8OUBQYGasSIsUpIaFkvYxmGoTXpG/ThoU/llHnvwoFF5ZoSM0hhE+6Vxca+c01JZl6ZXv9sr06cKTZlYUG+mj2xu3p1aJ57dgIA0BjQGASAq8Cnn34qt9utQYMGqXXr1t4uBwAAAGg0DJdL+d98pdzPPpHhdJpyn8goxc24X8G9enuhusbj1KmTWrfuR1VWVpiyhIRWGjFijAIDA+tlrDJHud7Z+6525B0wZX5ut6bk2zV00Cz5tOtXL+PBMwzD0Po9Z/T2t4dU6ahhn8gOUZp9c3eFB7NHJAAA3kRjEACuAp988okk6bbbbvNqHQAAAEBjUpl2UmcWzFflydQa8/DRY9Tijrtkq6eGV1PkcrmUkrJF+/btMmUWi0X9+g1Sjx59ZLFY6mW844WpemvnQuU5S01ZywqH7nNFqdWNP5M1hBllTUlZhVOLvzmgzfuzTJnNatGd13bU+EGtZa2n6wgAAFw5GoMAcBElJSVat26dNm3apH379unEiRMqLi6Wv7+/YmNj1bt3b02cOFEjR46stz+UL9fOnTt1/PhxBQYG6oYbbvBKDQAAAEBj4nY4lPfF58r76gvJZZ655BsTq7iZDyioazcvVNd4lJQUKzl5lXJyzM2c4OAQjRw5VrGx9bO3n9twa2Xqj/r82Ndy15CPKCjTra2vVdCAybJYbfUyJjzjSHqh3vhsr3IKzbNN46OC9PCtPdQ2PtQLlQEAgJrQGASAC1iwYIHmzp2ryspKU+Z0OnX8+HEdP35cn376qQYOHKi//e1vSkxM9HidZ2cLTpgwQSHNeD8UAAAAQJLKjx1V5sL5sp8+bQ4tFkWOv07Rt02R1d/f88U1Iqmpx7R+fbIcDrspa926nYYNGyV//4B6GavIXqxFu5boQNEJUxbkcuuuApf6Df+5fBKbd6O2qXG7DX2x4YQ+XXtCbsO8T+SoPgm6d1xn+fvR6AUAoDGhMQgAF3D8+PGqpmBcXJyGDRumHj16KDo6WpWVldqxY4c+++wzlZWVaevWrZo+fbqWLVum6GjPLXljt9v15ZdfSpImT57ssXEBAACAxsZdWancTz9W/nffSDU0KfwSExV3/2wFdujoheoaD5fLqa1bN+rgwX2mzGq1asCAIeratUe9rYhyIO+wFu5eomKXeTZZ+3K7plpbKn7iw7IGhtXLePCMvKIKvfn5Ph1MKzBlQf4+mnljVw3qGuv5wmrJXe6Q/XCeDLtLfl2iZQtt3h8UAAA0LzQGAeACLBaLRowYoVmzZmno0KGyWq3V8smTJ+uhhx7S7Nmzdfz4cZ06dUp///vf9eyzz5rO9cQTT2jXLvOeHRczYcIEPf744xc95ocfflBBQYHi4+M1ZMiQyzo/AAAAcLUoO3hAmYsWyJGVaQ5tNkXdeJOibr5VVl9fzxfXiBQVFWj16lXKz881ZaGhYRo1aryio1vUy1gut0srjn6l79KSdX6b1mIYGptfrps63ST/3jfIYrHWeA40TtsOZmnhVwdUWuE0ZZ1aheuhW3ooOrx+Zps2BMepIpWtS5Pxv/U70ooUemtnWWxchwCA5oHGIABcwK9+9StFRERc9JiWLVvqxRdf1KRJkyRJX331lZ588kkFBgZWOy4jI0PHjx+/rPGzs7MveczZZUQnTZpkalwCAAAAVzt3Rbmyl3+gwh+/rzH3b9NWcffPUkCbth6urPE5evSQNm1aK6fT3Mxp3/4aDRkyQr6+fvUyVm55vhbsWqjjpRmmLMzp0j1FVvUc9WvZYjvUy3jwjEqHS++tOqzVO8zL9Fos0qTh7XXzsLayNdK/TQ2XW+VbM2Q/kFPtcXdRpQyHm8YgAKDZoDEIABdwqabgWV27dlX79u11/PhxlZeXKzU1VV27dq12zJIlS+q9vry8PK1Zs0aSdNttt9X7+QEAAIDGrHTPLmUuXihnXp4ps/j4KPrW2xR53Q2y+DTvtz4cDoc2b16no0cPmTKbzaakpOG65pou9bZ06I7sPXp7z7sqNxymrEtppe4NvEYtbp0ti19QvYwHzziZWazXP9urjNwyUxYd5q+Hbu2hTq0iPF9YLbnyK1S6JlXufPOStj7xwbKwDyIAoBlp3r8dA0A9CQkJqbp9dl/ChrZixQo5HA717dtXHTrwSVsAAAA0D66SEmUve1dF69fVmAd0vEbx98+SX0KihytrfPLzc5WcvEqFhQWmLCIiUqNGjVNERFS9jOVwOfTRoU+VnLHZlNkMQzfkVWhsz9vl33V0vTUh0fAMw9DKbaf0wQ9H5HSZ9+4c1DVWM2/ooqCAxrlMr2EYsh/MVfnW01IN9fu0DlPQsNayWLkmAQDNB41BAKgju92uEydOVN1PTPTMGxBnlxFltiAAAACai+Lt25T19iK5iopMmcXPTy2m3KGIseNlaaRLGXqKYRg6fHi/tmzZIJfLZco7deqqQYOGyaeeZlNmlmZp/s4FSq8w710Y5XBpWqmfOo37L9miWtbLePCMojK73vpiv3YdNX9f/Xytmja+s0b0Tmi0jV53hVNl69PkTDP/90I2i9QjRuXxwQr2Z7YgAKB5oTEIAHW0YsUKFRcXS5J69OihmJiYBh/z8OHD2rt3r/z8/HTTTTc1+Hi1FRbWeDeYx5Wx/u8nZ61Wi6Kigr1cDa4mXFtoCFxXaAhcV42DvaBAJ+a9pbz1G2rMw3r1VIdHH1FAfJyHK7tyDXVtVVZWauXKlTp82Lx0qJ+fn8aNG68uXbrU23irT2zQgm3vqtIwNyD7FFfovvgBSrxjtqy+/vU2Ji6svq6rlENZ+tf7O5RfbF4Rp0PLcD1+b3+1jA2p4ZmNQ/nJAmV9fViuUvOStj5RgUqpLNCxlctkkVOtuwzX7feP80KVAAB4B41BAKiDvLw8/f3vf6+6/+ijj3pk3I8//liSNHbsWIWHh3tkzNqwsVn7Vctischma5yfBEbTxrWFhsB1hYbAdeUdhmEoe3Wyjs97S87iElNuCwpSuwdmKG7C+EY7a+lS6vPaOnPmjFasWKHCwkJTFhsbq4kTJyoyMrJexip3VGje5iVac2q7KfN1G7q1oFI3jZqt0B4j6mU8XJ4rva4cTreWfLVfH/94pMb8ttEdNeOmbvL1aZyz7AyXW7nrTip/86kac2vHSK3YsU6q3KwA20/N7IzD2SotGaawcD78AQBoHmgMAsAVstvteuyxx5Sb+9OyKuPHj9eECRMafFyXy6XPP/9ckjR58uQGH+9yuFxub5eAema1WmSxWGQYhtxu854cwJXi2kJD4LpCQ+C68p7KnFwdf/0NFWwzN54kKWJAf7V/+CH5t4j+3+9N0/r+1Oe1ZRiGUlJStHbtGrnd5t/J+/btpxEjRsjHx6defmc/np+mf657VZkV5gZkXKVTM13h6nnXU/KNiONvBA+ry3WVnl2ifyzdrqPp5u9rRIi/fnl3X/XrEiupcf7t58gvV9ZXh1SZWWrKrIE+yojy1eaN78nPeko6p2dqs5TJ7ayUyxXowWqvDB/GBQDUB4thGE3rN2cAaATcbreeeOKJqgZdmzZttHz58kY1e88b8vPL5HSalxBC0xUVFSybzSqXy628PPMf2MCV4tpCQ+C6QkPguvI8wzBUuGa1cj54X+7yclNuDQ5W7L3TFDp4aJOdJSjV37VVUVGh9et/1KlTJ02Zn5+/hg0brTZt2tWh0v9jGIZ+TFurj4+skKuGRmxSYbkmxw9TaNKdstj4LLo3XMl1ZRiG1u7O0NLvDqvSYf57rleHaM2+uZvCgv3qu9x6YRiG7EfzVb4pXXLW0LCMDdKaMydUnLdKNkuZKY5K6Krx9zza6P974uNjU2RkkLfLAABcBfgtDQAuk2EYeuqpp6qagomJiVqwYEGzbwoCAAAAdWXPzlLmogUqP7C/xjxk4CDF3nuffPjdW5KUmXlGa9asUlmZuQEUExOnkSPHKSSkfvaBK3WUacmepdqdb9670N/l1u0FTg0eMkc+rXvXy3jwjLIKhxZ/c1Cb92eZMh+bRXdce43GD2wlayNtmrntLpVvOCXHiQJzaLWopGWwvt+xSn7GXtks5mZ26y5JSrruzkbfFAQAoD7RGASAy2AYhv70pz9p2bJlkqT4+HgtWrRIrVq18nJlAAAAQNNluN0q+H6lcj5aLsNuN+W28HDFTpuh0P4DvFBd4+N2u7Vnzw7t3LlNNS0E1bNnX/XtO1BWa/0sO3ik4LgW7FqkAqd5tlXrCofuM2KUeNOjsgbXz/6F8Iwjpwr1+md7lVtUYcrio4L0yKQeahMX6oXKaseZWaLSNSdllDpMmSXUT3uMUh3f/on8rTnVlg6VJKvNX4NvmKo2Xfp5qFoAABoPGoMAUEuGYejpp5/We++9J0mKi4vT4sWL1aZNGy9XBgAAADRdladPK3PRW6o4eqTGPGzYCMXcfa9swcEerqxxKi8v09q1PygjI92UBQQEasSIMUpMrJ8PLroNt745vlJfnFhZ4w6Oo/LLNLHdeAX1u1WWempCouG53YZWbDihz9aekLuGxvKoPom6d1wn+fvZvFDdpRluQxW7MlW5K7PGrUVdrUK16mCKXOXr5Ws1Nw3DY9pq5KQHFBwW5YFqAQBofGgMAkAtnG0Kvvvuu5Kk2NhYLV68WG3btvVyZQAAAEDTZDidyv/2a+V+9okMp9OU+0RFKW7G/QruydKUZ50+fUpr1/6gigrz3ovx8S01cuQYBQbWzx5khZVFWrhrsQ4Vm/cuDHa6dXeRoT4j/kM+8Z3rZTx4Rm5hhd78fK8OnSo0ZUH+Prr/xq4a2DXWC5XVjrvErtI1qXJlmWevWvxsOhPtp00pn8nfekxW0+qgFnUddJ16Db9BVmvjbHoCAOAJNAYB4BLObwrGxMRo8eLFateunXcLAwAAAJqoipOpylz4lipPptaYh48Zq5jb75Q1INDDlTVObrdbO3du1e7dO0yZxWJRnz4D1LNn33pbOnRv7kEt3v22StyVpqxjmV1TfdsobuJDsgTUz/6F8IytB7K08KsDKqs0N+I7twrXnFt6KDo8wAuV1Y79eL7KNpySHG5TZmkRpI15p5Wz61v5W4tNuW9AmEbcer9iW13jiVIBAGjUaAwCwCU888wzpqZg+/btvVwVAAAA0PS4HQ7lrfhMeV9/Kblcptw3Nk5xMx9QUJeuXqiucSotLVFy8iplZ2easqCgYI0cOVZxcQn1MpbT7dTnR77UylNrTZnFMDQ+v1w3dLlV/j0nyGIxTcdCI1Vpd+ndVYeVvPO0KbNaLLp1RDtNHNpOVvMUu0bBcLhUvild9qP55tAiVbQJ03c7VsvXtUM+VnPTML5dLw29aZr8AupnNi0AAE0djUEAuIg///nPWrp0qaT/awp26NDBy1UBAAAATU/5saPKXDhf9tPm5oQsFkVOuF7RkybL6u/v+eIaqbS0E1q3brXsdvPMvZYt22j48GsVEFA/M7xyyvP01s4FSi0zNyDDHS7dW2pT92t/K1sLtlNoSk5mFuv1z/YqI9e89GZ0WIAevrWHrmkV7oXKaseZU6ay5FS5i+2mzBLiq8N+Tu3f+q78rGek8/qaFquP+o+9Qx17DaWRDQDAOWgMAsAFzJ07V2+//bakn5bnmTFjho4dO6Zjx45d9Hndu3dXYmKiJ0oEAAAAGj13ZaVyP/lI+Su/lQzDlPsltlTc/bMVyAfwqrhcLm3fvkn79+8xZVarVf37J6lbt1711uzYlrlTS/e9rwrDvMRkt5JK3RPSVdET75fFj6VdmwrDMPTtljQt//GInC7zv7ukbrGacX0XBQX4eqG6SzMMQ5V7s1WxPUMyly+jZahWHdsne3Gy/KwVpjw4PF6jJs9WWFScB6oFAKBpoTEIABewffv2qtuGYeiFF16o1fOeffZZTZkypaHKAgAAAJqMsgP7lblogRzZWebQZlPUTRMVddNEWX0bZ3PCG4qLi5ScvFK5uTmmLCQkVKNGjVOLFrH1Mpbd5dDygx9p3ZltpsxmGLopr0Jje98tvy4j6mU8eEZBcaXmvrtd2w+a/935+9o0bUJnDe8V32hn0bnLHCpbc1LOMyXm0NeqvPggrU35Sn46qJpWP+3Qe5T6XztJNh/+uwIAQE1oDAIAAAAAgHrlKi9XzvJlKlz9Q425f5u2in9gtvxbt/FwZY3b8eNHtXFjshwOhylr27aDhg4dJT8/v3oZK6M0U/N3LFBGZZ4pa2F3ampFoDpN+LWsEfWzfyE8I+Vglv65bIcKis3Lz7aNC9XDk3ooPqrx7rXnOFmosvVpMirNe5BaogK1rSxPGduXy99q3m/Q5hukYRNnKLF9d0+UCgBAk0VjEAAuYMmSJd4uAQAAAGhySnfvUuaShXLmmRtOFh8fRU+arMjrbpDFZvNCdY2T0+nUli3rdfjwAVNmtdqUlDRUnTp1q5cZXoZhaP3pzfrg4MdyyG3K+xVV6I4W/RU+fqosPvXThETDczjd+ij5qL7ZnFZjfkNSG00Z3UE+NquHK6sdw+lW+dbTsh/MrTF3tAvXd7vWy2rfKh+recnb6MTOGnHLDAUEhzV0qQAANHk0BgEAAAAAQJ25SkqU9f5SFW9YX2Me0PEaxT8wW37xzEA7V25urr788nMVFJhnQIWHR2jUqHGKjIyul7HKnRVauvc9bc/dZ8p83YZuy7dr+KCZ8m03oF7Gg2dk5Jbq9c/26mSmeenNsGA/PTixm3q2r59rqCG48spVmpwqd6F5lqMlyFcnwyxK2fKB/K1pkqk3blWvEbeo26AxslgaZ9MTAIDGhsYgAAAAAACok7ID+5Xx5mtyFRaaMoufn1pMuVMRY8fJYuWN+7MMw9CePXu0atUqOZ3mGVAdO3ZWUtJw+dbT/oupRWmav3Ohch3Fpiy+0qHpzki1ufHnsoY03gYSqjMMQ2t2ZWjpykOyO8yzP3t3jNasm7opLLhxzvw0DEP2Azkq35ohuQ1TbkkM0epTx1RycpX8rWWmPCA4Sv8/e/cdHdWVpgv/OedUVClnCZDISCJHESQwORpjHMDZxjZuz3hyunfud3vSutN3rZm+M2t193TbOBBsnO02wTbZIAmRg1AESSjnrFLlOuf7w22P7V02QSVVCT2/tVgL9Jw6+5V0JKrOW3vv7E3PIzph1GCUS0REdM9gY5CIiIiIiIiI7oqmqug89AXaPvkI0MQb+yHpGUh4+jno4+ICUF3wcrtdOHToFEpLxaVDdTodMjOzMG7cRL+MpWkajtWcxL6KL+CF+D1a0GXHppGLYZmzGZLM5V2HCqvdjV1fluJiWauQ6RQZz6xPx8L0eL8sPzsQVLsbtrxaeOrFRjV0MqyjQnHi4lHo1UIoknjdjpo4F3NXPQq9wTgI1RIREd1b2BgkIiIiIiIiojvm7etD01uvo+/KZSGTzWbEPbIV4dmLg7YxESitrS3IzT2B3l5xdmVUVDQWL16BiIhIv4xldfVh97W3UdRdIWQmr4pHuryYs/Bl6EZk+GU8GhxlNZ14bX8xOnvFpTdHxofib5+ag5SEMHR09AWgultz1/fAllsLzSHOlJWjTCjw9qHq3NswyK3C0qGyYsS81Y8hNW3WIFVLRER072FjkIiIiIiIiIjuiKOmGo3/9Wu428TZSiGTpyDh2eehj4oKQGXBS1VVFBRcwrVrl6H5mF05aVIG5syZD0Xxz62aG50VeKtgN7q9diFLsbvxpJSIpA0vQzaH+2U8Gnger4p9eTdx8HS1j7mfwH0zR+Dlh6YhxGyA1ysuLRpomleF41ITnMXi7w0AUEdH4FDRJaj209DLLiEPj0nB4ge3wRIePdClEhER3dPYGCQiIiIiIiKi26JpGnpyTqFl7x5oP9wXT5IQ88CDiF63gXsJ/kBPTzdyc4+jzUcj1WAwYsGCbKSmjvXLWKqm4vPKw/iy+rjQPJI0DUu67Ngwbg1M09dBkvh9Gipauux4bV8RKht6hMxi0uG5demYNTEORkNw3urzdjtgO1UDb4fYqJZMOjTGGnDu/O9hlCshC5OMJUyasxLTstZC5nK3RERE/RaczxaIiIiIiIiIKKioTida3tmDntO5QqaEhiFx+89gyZgcgMqCl6ZpuHGjBBcunIHnh41UAElJSVizZi28Xv/cnul0dGFnwS6UW+uFLNTjxZZeCdOz/wJK/Di/jEeDI7+wCXsOl8Hh8gpZemoUXtiQgaiw4NxrT9M0uG50wH6+AfCIsxjlRAtOtzag/fJhGGWx6ak3hiNr47OIHzV+MMolIiIaFtgYJCIiIiIiIqKf5GpuQsNvfwNXXa2QmcaNR9JLfwR9NJf3+y673Yb8/FOoq6sRMkmSMH/+fMyfPx+aBr/sBXetrRh7CveiTxWXYJxgc+Ex01jEb3gektHS77FocNidHuw5XIYzRc1CpsgSNi8ei9WZKZCDdB9P1emBPb8O7mpxP03IEuyjI3Ds4lfQeS9DJ4tNw4TUqVi4/gkYTCGDUC0REdHwwcYgEREREREREf2o3osX0LzzDah2cQnAyBUrEffwFkg63l74rtraKuTnn4LD4RCy8PAIZGUtxcSJYyDLcr/3gvOoHvz++n6caMgXMlnTsKrTjpUZD8GYfh+kIG0gkai8vhuv7StCW7d4DcVHmfHSxskYkxS8+0N6mqzoy6mBZnMLmRxhRJnBjdKze2GQG4EfXJaSpMOspQ9h3PSFvGaJiIgGAJ+5ExEREREREZFA83jQ9slH6Dz8pZBJRhMSn9uGsDnzAlBZ8HK73Th/Ph/l5aU+84kTMzB7dib0er1fxmuxteHNq2+h1i7uXRjp9uIJmwGTlv0PKNEj/TIeDTxV1XAwvwqf5VZB1X64SySQNTUJj6+cAFOQ7iWoqRocV5vgLGjxfcDoCBy5XgxXz1cwyGLTMyQiEUsefB7h0QkDXCkREdHwFZzPIoiIiIiIiIgoYDxdnWh89bew37guZIbkEUj+o1dgSEwKQGXBq7W1Gbm5J9DbK+6TZjKZsXDhEowcmeK38c43Xca7JR/AqYn7zk22OrA1Yiqilj8NSRece8+RqL3bgR37i3C9Tlx602zU4enVk5CZEbwNM2+vE7acGnhbbUImGRS0jwhB7oXPYdBKIfuYCDhmajZmL90EReefxjkRERH5xsYgEREREREREX3LVlqCxld/C6+PBlfY/AVIeOpZyEY2m76hqioKCi7h2rXL0HzM8Bo1ajQWLMiGyWT2y3hOrwsflnyE/JYrQqZTNWzodGDJjMdhmLDAL+PR4LhQ2oKdX5TC5vQI2fiREdi+IQOxkf65hgaCq7ITtjN1gFtcGleOt+C8tQ1N5z6EUe4Ulg5V9CFYuP5JJI+dMkjVEhERDW9sDBIRERERERERNFVF55efo+3Tj4EfNLgknQ5xWx9HxJKl3PPrO7q7u5CbewLt7eJSnjqdHnPnLsD48ZP89jWrtzbijStvodnVJWRxLg+edIZh7Oq/gRwe75fxaOA5XV7sPXodOQWNQiZJwMZFY7BhYSoUWQ5Adbemubywna2Hu7JTDCXAPS4Khy+dhuI6B50sNj2jEscj+4FnYbYE736JRERE9xo2BomIiIiIiIiGOW9fH5reeA19BVeFTBcTg+SXX4Fp9JgAVBacNE3D9esluHAhH16vuJRnXFwCsrKWIizMP80OTdOQU5+Pj6/vgwfijKzZPXY8lDAf4fO2QFJ4q2eoqG7qxe/2FaG5Q1x6MybchO0bMzBhZOTgF3abPK19sJ2qgWp1CZkcZkBVmIQr+R/AKNcIswQBGVMXbUD6vGWQpOBsehIREd2r+GyRiIiIiIiIaBhzVFeh8be/gbtNnPVmmToNic9vhxIaGoDKgpPdbsPp0ydRX18rZJIkYfr02ZgyZQZkP83wsrnteKfoXVzpKBUyg6riwS4PFs57AbqU6X4Zjwaeqmk4fK4WH5+sgFcVl5+dlx6Pp1dPQogpOPfa01QNzsIWOK40AWL5kFIi8FX1DVhvHodR7hNyY0g0Fj+4DdEJ/ttzk4iIiG4fG4NEREREREREw5Cmaeg+dRKt774NzfODJf4kCTEPPIjodRsgBekShoFQU1OF/PxTcDodQhYeHoGsrGWIjY3z23g3u2vw5tW30OERmysjHG48ocZi1No/gmyJ8tuYNLC6rE68caAYRVXi0ptGg4InV07EwimJQbtkr9rnQl9ODbzN4jUJvQxragROnDsMvXoNiiR2DUdOmIN5q7dAb+A+pURERIHCxiARERERERHRMKM6nWh5ezd68vOETAkLQ9L2lxGSnhGAyoKT2+3C+fP5KC8v85lPmpSB2bPnQ6fzz20WVVNxtPor7K88BNXHlKxFXTY8kLocITMfYON2CLlyow1vfl4Cq90tZKMTw/DSA5OREBUSgMpuj6u6C/bTddBc4vK5cmwIrnmsuJm/Cwa5VVg6VFIMmLfqMYxOnz1I1RIREdGPYWOQiIiIiIiIaBhxNTWh4be/hqu+TshM48Yj6Wd/DH0UZ6B9o6WlCbm5J2C19gqZ2WzGwoX3YcSIUX4br9dlxa6C3SjpqRKyEK+KR7pVzFr0J9AlTfLbmDSwXG4vPjhRjuOX6oVMArB2fio2ZY+BTgnOJq/m9sJ+vgGuGx1iKAHquCgcKrgA1XYaelncbzAsZhSWbNoGS0TMIFRLREREt8LGIBEREREREdEw0XvxPJrfegOqQ1wKM3LlasQ99AgkP816G+pUVcXVqxdRWHgFmibO2ktJGY358xfDZDL5bczSjhvYdW0Perzi92e03YUnlFFI2vASJBP3fBwq6lqseHVfEerbxKU3o8KMeGFDBtJTg7cR7+mww3aqGmq3U8gkix5NsQaczf8URrkCsrD6qYRJc1ZgWtY6yLIyKPUSERHRrfHZPhEREREREdE9TvN40Prxh+g6ckjIZJMJCc8+j7A5cwNQWXDq7u5Cbu5xtLe3CZler8fcuQsxbtxEv+0D51W9OFjxJQ7XnhQWDpU0DUu7HFg3cQNMU1YF7d5z9H2apuH4pXq8f7wcHq8q5DMnxOK5dekINesDUN2taZoGZ3EbHJcaAVVsjMsjw5HXVIuOC4dhlHuEXG8MQ9bGZxE/asJglEtERER3gI1BIiIiIiIionuYu7MTja/+FxzlN4TMMGIkkl9+BYbExABUFnw0TUNZWTEuXjwDr1fcRy0uLgFZWUsRFhbutzHbbB34z/P/hcq+RiEL83jxWK+CKUv+CkrsaL+NSQOrx+bCWwdLcLWiXcgMOhlbV0zAkunJQdvkVe1u2HJr4WkQl8+FToZ9bCSOnTsBnecSdLLY9IxPmYyF65+E0WwZhGqJiIjoTrExSERERERERHSPspUUo/G138HbK87oCVuwEAlPPgPZaAxAZcHHZrPh9OmTaGioFTJZljF9+hxMnjwNsuy/feDO1V3Gf+W/BZvqFrJJfU5stUxE3P3bIBnMfhuTBlbRzQ68fqAY3X3iXnuj4kPx0sbJSI4N3oaZu64HtrxaaA6PkMnRZpTpXSjNexsGufHrDRK/Q5J0mLl0M8ZPXxS0TU8iIiJiY5CIiIiIiIjonqOpKjq+OIj2338C/GB/PEmnQ9xjTyJi8RLevP+D6uqbOHPmFJxOcR+1iIhIZGUtQ0xMrN/Gc3vdeOvCXhyuyhMyWdOwptOBFVMfhWFiFr9HQ4THq+KTk5X48lyNz3zlnFF4+L6x0OuCc689zavCfrERrhJx+VwAwPgoHC65Bnf3SRhkcQ/MkPAELH5wGyJikga4UiIiIuovNgaJiIiIiIiI7iFeqxVNb+5AX8FVIdPFxiL5Z6/ANHr04BcWhFwuF86fP42Kius+87S0KZg1ax50Ov/dPmmwNmFnwS7UO8RlJqPdXjxuM2Hiir+AEpnstzFpYDW29+HVfUWoabYKWXiIHtvWZ2DauJgAVHZ7vF0O9J2qhtopNvwksw4dIyzIOXsQBq0Eso8+9ZgpWZi97EEouuDcL5GIiIi+j41BIiIiIiIionuEo+omGn73G3jaxFk/lmnTkbjtRSihoQGoLPg0NzchL+8ErFZxHzWzOQSLFt2H5OSRfhtP1VQcrzmFfRVfwAtNyKf1OvBo9CxELn8cks7gt3Fp4GiahpyCRuw9eh0ut7jX3tSxMdi2Ph0RluD8fmqaBtf1DtjP1wNe8ZpURoThfFcrGvM/hFHuEJYOVXRmLFj/FEaMmzJIFRMREZE/sDFIRERERERENMRpmobukyfQ+t5eaJ4f7A0mSYh98CFErVkHyY/74w1VXq8XV69eRGHhFZ95auoYZGZmw2Qy+W3Mdnsndl/bg3JrnZDpVQ33dzqRPfspGMbO9duYNLCsdjd2fVmKi2WtQqZTJDyydDxWzB4ZtEvBqg4P7Kdr4a4V9x+FIsEzMRqfn8uF7DwHnSzuNxiVOA7ZG5+FOTRiEKolIiIif2JjkIiIiIiIiGgIU51ONO/Zid4z+UKmhIUjafvPEJKeEYDKgk9XVydyc4+jo0NcxlOv12PevEUYO3aC35o5mqbhTOMFfFj2CZyaV8gTnB48o0Vh1JqXIYf5bw9DGlhlNZ14bX8xOnvFPSmTYkLw0sbJSEkIC0Blt8fd2AtbTg00u9jwkyNNqAqTcPXU+zDI1cIsQUDGlIXrkZG5HJLENxoQERENRWwMEhEREREREQ1RrqZGNPzXr+FqqBcy84SJSHrpZegiowJQWXDRNA2lpUW4dOksvF6xQRcfn4isrKUIDfVfM6fXZcXeovdQ0CnuXyhpGrK77Xhk0irELHwEnd1ig4mCj8erYl9eFQ6ervKxGCxw38wR2LJsPIx6ZdBrux2aqsFxuQnOwhafuTwuCl9VXIe1/BgMcp+QG0OisHjT84hOTBnoUomIiGgAsTFIRERERERENAT1XjiHprfehOZ0CFnUqjWI3fwwJB1f9ttsfcjLO4nGRnEZT1mWMWPGHGRkTIPsx2VWr7UV453Cd9Grig2/SLcXW/oULFj39wgZNQlerwqAjcFg19Jlx459RahoEJfetJh0eG5dOmZNjAtAZbfH2+OE7VQ1vO12IZOMCnrGROCr04egV69BkcS254jxs5G5Zgv0Bv8tsUtERESBwVcIREREREREREOI5vGg9aP30XX0iJDJZjMSnn0eYbPnBKCy4FNdXYn8/By4XD4adJFRyMpaiuho/y3h6fA48HHZpzjdfNlnPrvHjs0xsxCx7HEYE6L9Ni4NrPyiJuw5VAaHS5xtmpYSiRfvn4yoMGMAKrs1TdPgruiE7Ww94FGFXEkMxVVXD6pydsIgtwpLh0qKAfNWbsXoDP5OISIiulewMUhEREREREQ0RLg7OtD46n/BUVEuZIaRo5D88h/DkJAYgMqCi8vlwrlzeaisvOEzT0+fglmz5kFR/HdbpKKrCruu7Ua72ypkFq+Kzd1ezMncDt2oqX4bkwaW3enB24fLkF/ULGSKLOHBxWOxZl4KZNk/e1L6m+bywpZfB3dVlxjKEtRJ0Th06QLUvlzoZZdwSFj0SCx+8HmERsQMfLFEREQ0aNgYJCIiIiIiIhoC+oqL0PTa7+C19gpZ+MIsxD/xFGRjcM5aGkzNzY3IzT2Bvj6xQRcSYsHChUuQnDzSb+N5VA8OVHyBo7U5PvedS+9z4tGQCYjb8CwkU6jfxqWBVVHfjVf3FaGtW1yqNz7KjJc2TsaYpPAAVHZ7HA096D14HapVbPjJ4QY0xBpxLucTGKVy+OprTpy1AtOz10NWgnO/RCIiIrp7bAwSERERERERBTFNVdFxcD/a9/0e0L7fepJ0OsQ//hTCsxdDkoJz1tJg8Xq9uHLlAoqKrvrMU1PHYv78LBiN/tsjrcHahJ0Fu1DvaBcyg6piQ6cL2TMeg2HCAr+NSQNLVTUczK/CZ7lVUDWx1Zs1NQmPr5wAkyE4b6lpqob2szXoyK+Br061MiYSufW16Dj7JYyyuF+izhiGrPufQULKxEGoloiIiAIhOJ/FEBERERERERG8VisaX38NtsICIdPHxiHp5T+GKXX04BcWZLq6OpCTcwKdnWKDTq/XIzMzC2PGjPdb81TVVByvOYl9FV/C66P7kmp3YauciJHrXoJsifLLmDTwOnoceG1/Ma7XdgmZ2ajD06snITMjYfALu02q1YXGI5VwNIiziiWDAvv4KBzNPw6d+yJ0srjfYNyoDCza8BSMZstglEtEREQBwsYgERERERERURBy3KxEw29/A0+H2OyyzJiJxOdegGIZ3jfwNU1DaWkhLl48B1X1CnlCQhIWLboPoaFhfhuz3d6J3dd2o9xaL2SKpmFlpwMr0zbCOHnFsJ/FOZRcKG3Bzi9KYXN6hGz8yAhs35CB2EhzACq7Pa6bnbDl1wFuseGnxFtQqrhQenIPDHID8MPLUlIw877NmDAji9csERHRMMDGIBEREREREVEQ0TQN3V+dQOv7e6F5ftCkkCTEbn4YUavXQpLlwBQYJGy2PuTlfYXGRrFBJ8syZs6ci/T0qZD99HXSNA1nGi/gw7JP4NR8NCGdHjzmCsO4FX8JOTLJL2PSwHO6vHj32HWcutooZJIEbFw0BhsWpkIJ0p83zeWF7Vw93BWdYigB0qQYHCosgLvrJAyyXTjEHBaPJQ8+j4hYXrNERETDBRuDREREREREREFCdTrRvHsnes/mC5kSHo6k7S8jJC09AJUFl6qqCpw5kwuXyylkkZFRyM5ehqioGL+N1+uyYm/ReyjovC5kkqYhu9uB9anLYJm5EZKs+G1cGljVTb343b4iNHfYhCwm3ITtGzMwYWTk4Bd2mzwtfbDl1EC1uoRMF25ES1II8vL2Q6+VQPYxEXB0xiLMWbEZik4/CNUSERFRsGBjkIiIiIiIiCgIuBob0PDbX8PV0CBk5gkTkfTSH0EXGTn4hQURl8uJc+fyUFlZ7jPPyJiKmTPnQlH8d7ujoLUYe4veRa/qownp9mJLnx6Ts/8SStwYv41JA0vVNBw+V4uPT1bAq4p7RM5Lj8fTqychxBScDTNN1eC81gzH1Wb42OISlkmxONPWhJrc92CQO4SlQ2WdGQvXPYkR46cOTsFEREQUVNgYJCIiIiIiIgqw3vPn0LTzTWhOh5BFrV6L2M0PQ1KG90y0pqYG5OaegM3WJ2QhIRYsWnQfkpJG+G08h8eBj0s/xemWyz7z2T12bI6bi4jlWyHpDH4blwZWl9WJNw4Uo6hKXHrTaFDw5MqJWDglMWj32vNaXbDlVMPbIs5yhF6GflYSPjlxAqotHzpZ3C8xMmEsFj/wHMyhEYNQLREREQUjNgaJiIiIiIiIAkTzeND64fvoOnZEyGSzGYnbXkDozNkBqCx4eL1eXL58HsXFBT7z0aPHITMzC0aj0W9jVnRVYVfBbrR7rEJm8arY3K1hzoKXoRuR4bcxaeBdKW/DmwdLYLW7hWx0YhheemAyEqJCAlDZ7XFVdsJ2pg5wq0KmxIWg0gJcPbALRrnax9KhMiYvWIvJ81dCkoJzv0QiIiIaHGwMEhEREREREQWAu6Mdjb/7LzgqK4TMOGoUkn72CgwJCQGoLHh0dnYgN/c4Ojs7hEyvNyAzMwtjx47323ge1YODFV/gSG2OrxUakdbnxBbLRMTd/ywko8Vv49LAcrm9+PBEBY5dqhMyCcDa+anYlD0GOiU4G2aaywvbmTq4b3aJoQTIaTE4VlwKW/sxGGVxRq3BHIXFm7YhJil14IslIiKioMfGIBEREREREdEg6ysqRNOOV+G19gpZ+KJsxD/xFGTD8F2eUtM0lJRcw6VL56Cq4uyoxMRkLFx4H0JDQ/02ZoO1CTsLdqHe0S5kBlXFhi43smc+AcO4eX4bkwZeXasVr+4rQn2r2DCLCjPihQ0ZSE+NCkBlt8fT0oe+U9XQ+sRZjnKoAe2jLMjJ/QIGrRCKJLazk8fNwvy1W6E3mAajXCIiIhoC2BgkIiIiIiIiGiSaqqLj4H607/s9oH3/Jr6k1yP+iacQkbU4MMUFib4+K/LyvkJTU4OQybKMWbPmIT19qt/2gFM1FcerT2Jf5Zfw+pgnmGp3YaucjJHrtkMOifTLmDTwNE3D8Uv1eP94OTxesbk8c0IsnluXjlCzPgDV3ZqmanAUNMNZ0Axf01eV0ZE429aCplMfwii3fT318TtkxYA5Kx7FmMlsZBMREdH3sTFIRERERERENAi8vb1ofP1V2IoKhUwfF4ekl1+BKWV4L/V382Y5zp7NhcvlErLIyGhkZy9DVFS038Zrt3did8FulPfVC5miaVjZ5cDKtAdhzFjqt0YkDbwemwtvHSzB1Qofsz91MraumIAl05OD9nvq7XHCllsDb6tNDPUyXBNjcCQ/B7LrHPSyOJMwMi4F6558GV6Jy90SERGRiI1BIiIiIiIiogFmr6xE4+9+DU+HuFeeZcZMJG57AUrI8L2J73Q6cfZsLqqqxP0WASAjYxpmzpwLRVH8Mp6maTjTeAEflX0Ch+YV8gSnB495IjBuxV9Djhje+zwONUU3O/D6gWJ094nN5VHxoXhp42Qkxwbnz5qmaXBXdsJ2ph7wiLMclXgLShUPSr56F0a5WpglCEiYnrUWC9c8CEBGR4e4fCoRERERG4NEREREREREA0TTNHSfOIaW998FvD9oQMkyYh98GFFr1gbtzKXB0NhYj7y8r2CziU2MkBALsrKWIjEx2W/j9bqs2Fv4Lgq6bgiZpGnI6nZgw+gVsMzYAEn2TyOSBp7Hq+KTk5X48lyNz3zlnFF4+L6x0OuC83uqOj2wn6mHu6pLDCVAmxSDL68Vwd39FYyy+LNiMEUg64FnMWnqNCiKDK+P5VOJiIiIADYGiYiIiIiIiAaE6nCgefdO9J47I2RKRASStr+MkElpAagsOHi9Hly+fB7Fxdd85mPGjEdm5iIYDEa/jVnQWoy9Re+iV3UKWaTbiy02AyYv/msoscN7SdehprG9D6/tK0Z1c6+QhYfosW19BqaNiwlAZbfH02RFX24NtD5xWVA5zIDGeDPO5H4OI4qgSOKGg0ljZ2D+mq0wmEIGo1wiIiIa4tgYJCIiIiIiIvIzZ0MDGn/7a7gaG4TMPHESkra/DF1k5OAXFiQ6O9uRk3MCXV3i0qoGgwGZmdkYM2ac38ZzeJz4uPQTnG657DOf3ePA5oRMRCx/BJLO4LdxaWBpmoacgkbsPXodLrc4Q27q2BhsW5+OCEtwfk81VYPjShOc11p85vLoCOQ2NKHjzIcwyW1irhgwZ8WjGJ0xd1jPOiYiIqI7w8YgERERERERkR/1nDuD5l1vQXOKs9Ki1qxD7IMPQfLTXnlDjaZpKC4uwOXL56GqYiMnMXEEFi1aAosl1G9jVnRVYVfBbrR7rEJm8arY3KNhzoKXoUtO99uYNPD6HG7s+qIUF8pahUynSHhk6XismD0yaBtm3h4nbKeq4W23C5lkUGAdG4HjZ/Kg85yHXhZnEobHpiB747MIjYwdjHKJiIjoHsLGIBEREREREZEfqG43Wj94D90njgmZbDYjcduLCJ05KwCVBQer1Yq8vBNobm4UMllWMGvWPKSnT/FbI8ejenCw/AscqcuBuPgikNbnxKOh6Yi//xlIBrNfxqTBUVbTiR0HitHRIzbfk2JC8NLGyUhJCAtAZbemaRpc5R2wn2sAPGJzXE6woNDjQPmp92FUagDhx0FC2rxVmLpwDWTugUlERER3gY1BIiIiIiIion5yt7ej8Xe/geNmpZAZR6Ug6eVXYIiPD0BlgadpGm7eLMfZs3lwu11CHhUVg6yspYiKivbbmA3WJuws2IV6R7uQGVQV93d5kDXrKRjGzvHbmDTwPF4V+/KqcDC/CpqPbu99M0dgy7LxMOqDs2GmOj2w59fBXd0thrIE7/gofHn1GtS+HBiVPuEQY0gkFt3/LOJGjB2EaomIiOhexcYgERERERERUT/0FV5D4+uvQrWKS1WGZy9G/GNPQjYE5x5nA83pdOLs2RxUVYkNUwCYPHk6ZsyYA8VPS6uqmorj1Sexr/JLeH3ME0y1u7BVGYmR616EHBLhlzFpcLR02bFjXxEqGnqEzGLS4bl16Zg1MS4Ald0ed6MVttwaaDZxWVA53ICaSAMu5B6ESS6BIonX7ojxszBv9RYYjJzdSkRERP3DxiARERERERHRXdBUFe37P0PHgX344fQlSa9H/JNPI2JRdoCqC7zGxnrk5X0Fm02c+WSxhGLRovuQmJjst/Ha7Z3YXbAb5X31QqZoGlZ2ObEq4yEY0hYH7b5z5Ft+URP2HCqDw+UVsrSUSLx4/2REhRkDUNmtaV4VjitNcBaKeyECgDQ6Al9V1aGn4hjMijjDVVYMmLtyC0ZnzB3oUomIiGiYYGOQiIiIiIiI6A55e3vR+PqrsBUVCpk+PgHJL/8xjKNSAlBZ4Hm9Hly6dA4lJeLXBgDGjp2AefMWweCnWZSapuFM4wV8VPYJHJrYOEpwevCYNxLjVv4t5PDgnVFGIrvTg7cPlyG/qFnIFFnCg4vHYs28FMhycDZ6vd0O2HJq4G23C5lkVNA1Khxfnc2FQbsIvSzOJIyMS8Wijc8iNCJmMMolIiKiYYKNQSIiIiIiIqI7YK8oR+Pv/guezg4hC505GwnPPQ8lJCQAlQVeR0c7cnOPo6urU8gMBiPmz8/G6NH+2x+t12XF3sJ3UdB1Q8gkTUNWtwMbxqyCZfp6SLLst3Fp4JXXdeO1/UVo63YIWXyUGS9tnIwxSeEBqOzWNE2D60YH7OcbAI8q5HKCBRdtfag9/QFMSg0g9DUlZGSuxuQFqyHLwblfIhEREQ1dbAwSERERERER3QZN09B1/ChaP3gP8P5gZposI/ahRxC1as2wXKZS0zQUFRXgypXzUFWxEZKUNAKLFt2HkBCL38YsaC3G3qJ30as6hSzS7cUWuwmTl/wplJhRfhuTBp5XVXHgdDX251VB1cS99rKmJuHxlRNgMgTnLS3V4YE9vw7umm4xlCW4xkTi8JWrkJ15MCo24RCTJQqL7n8Gscn+a6ATERERfVdwPosiIiIiIiIiCiJeux1Nr/0WvefPCZkSEYmkl15GyMRJAags8Ox2G3JzT6Cx0cfefoqCWbMykZY22W8NU4fHiY9LP8bplis+89k9DjyUuADhKx6GpOj9MiYNjrYuO17bX4zyerGpZjbq8PTqScjMSAhAZbfH3dALW24NNLtHyOQII8rNEq6e/hxmpQSSJDY9R02cg7krH4HeaB6McomIiGiYYmOQiIiIiIiI6CfYampQ8n//HY56sfFlnpSGpO0/gy4icvALCwINDXXIzT0Bh0PcQy0qKgbZ2csQGRnlt/Equqqwq2AX2j19QmbxqtjcK2HOwlegS5zotzFpcOQXNeHtw2WwO8V9IieMjMCL92cgNiI4G2aaV4XjchOcRa2+89QIHKmohr3zK4To2oVc1hkxb9VWpKbNHuhSiYiIiNgYJCIiIiIiIvoxbadyUPnbV6E6xeUqo9dtQMwDD0JSht8eYKqq4vLl8ygquuoznzJlBqZPnw3FT18bj+rBwfLPcaQuF+I8KyCtz4kt4ZMRt+EpSIbgbB6RbzaHB28fKcOZomYhkyUJD2SNxvoFoyHLwblEr7fLAVtONbwd4l6IklFBW5IFp87nwSRdhF4WZxJGJYzGog3PwBIRMxjlEhEREbExSERERERERPRDqtOJlvfeQU/OKSGTQ0KQuO1FhM6YGYDKAq+3twc5OcfR1tYiZGZzCLKzlyExMdlv4zVYm7Dz6k7UOzuEzKCquL/bi+zZz0A/epbfxqTBUV7Xjdf2F6GtW2yqxUWasP3+yRg3IiIAld2apmlwXW+H/XwD4BXb1VKCBWe7utF07kuEKDU+ziBh8oK1yMhcCVkefm8uICIiosBhY5CIiIiIiIjoO5y1tWh89b/gamoUMmNKKpJe/mMY4uIDUFngVVVVIj//FNxul5CNHJmChQvvg8lk8stYqqbiePVJ7K/8Eh4f8wRT7S48pk/FiHXPQzaH+2VMGhxeVcX+vCrsP10FzccU0EVTEvH4yokwG4PztpXq8MCWVwtPXY8YyhJsqeE4erkAOs9pGBWbcIjJEoVF9z+L2OQxg1AtERER0fcF5zMsIiIiIiIiokGmaRq6TxxD6wfvQfOIS/5FLF6CuMeegKw3BKC6wPJ4PLhwIR/Xr5cImSzLmD07E2lpUyBJ/lnusd3eid0Fu1HeJ+7rqGgaVna5sGrKIzBMXOS3MWlwtHbZsWN/Mcrru4XMbNThmTWTMC89IQCV3R53fS9seTXQ7OLvCCnCiFJFQ/GZL2BWSiFJYtczJW0O5ix/BHojl7wlIiKiwGBjkIiIiIiIiIY9r9WKpp1voO/KZSGTTSaM2f4ClGlzAlBZ4HV1deDUqWPo6uoUsrCwcCxevBwxMXF+GUvTNJxpPI+PSj+FA14hT3B68JgajXGrfgY5LNYvY9LgyS9qwtuHy2B3it/biSMj8ML9GYiNCM6GmeZV4bjYCGdJm8/cmxKOo2U34bbmIETXLuSKzoh5q7YiJW32QJdKRERE9JPYGCQiIiIiIqJhzVZWiqbXX4WnU2x8WcaOwaS/+UsYEhLR0dEXgOoCR9M03LhRivPnT8PrFRs5Y8aMx/z5WdD7aQal1dWHvYV7cbXrhpBJmoasHgfuH7sWIdPWQJJkv4xJg8Pm8ODtw2U4U9wsZLIk4YHsMVg/PxWyHJyzP72dDvTlVEPtFPdClEw6NESbkH8hD2b5EvSyOJMwOnEMFm54Bpbw6MEol4iIiOgnsTFIREREREREw5Lm9aL9wD50HNgHXxudRa1cjQkvPAO9yQivVw1AhYHjcrmQn38K1dWVQqbT6ZCZmYWxYyf4bRnPa63FeKdoL3pVce/CSLcXWxxmTF7y51CiR/hlPBo8N+q6sGN/Mdq6xaZaXKQJ2zdOxrjkiABUdmuapsFV2g77xQbA62MzxAQLclo60HHlc1iUWjGXJExZsBbp81ZBltnMJiIiouDAxiARERERERENO+72djTu+B0c5eLsNCUsDInbXoRl6jTIen0Aqgus1tYW5OQcg9XaK2RRUTFYvHg5IiIi/TKWw+PEx6Uf43TLFZ/5rB4HHkpehIjZD0FSeAtjKPGqKvbnVWH/6SpffXcsmpKIx1dOhNkYnN9X1e6GLa8Wnnrx5wCyhN6RoTh6uQBG7QyMik04xBwWjUUbnkVM0uiBL5aIiIjoDgTnsy8iIiIiIiKiAdJ78Tyad70F1SbezA9Jn4zE51+ELjJy8AsLME3TUFRUgMuXz0Hz0cmZNGky5szJhOKnBl1FVxV2FexCu0dcotXiVbG5V8bcRX8KJWG8X8ajwdPaZcdr+4tQUd8jZGajDs+smYR56QkBqOz2uOt6YMurheYQlwWVIowoUD2ouHAIZrkEvibNpqbPxezlj0BvMA1CtURERER3ho1BIiIiIiIiGhZUlwut7+9F98mvxFBRELtpM6JWr4U0DJf8s9vtyMs7gYaGOiEzGIxYuHAJUlJG+2Usj+rBwfLPcaQuFz4mkiGtz4ktEVMRd99TkPRGv4xJgye/sAl7DpfB4RL3pZw4MgIv3j8ZMRHB2TDTPCrsFxvhKm3zmbtHhuFwSSU0Ry7MSoeQK3oT5q3aipRJswa6VCIiIqK7xsYgERERERER3fOc9XVofPW3cDXUC5k+Ng6J238G89hxAags8Boa6pCXdwJ2u13I4uMTkZW1DKGhof4Zy9qEnVd3ot4pNlUMqor7e1Rkz94Gfep0v4xHg8fm8ODtw2U4U9wsZLIkYVP2GKybnwpZ9s++lP7m7bSj71QN1C5xL0TJpEN1mA7nL+bColyBJIszCWOSxmLB+qdhCY8ejHKJiIiI7hobg0RERERERHTP0jQN3SdPoPX9d6G53UIeNi8T8U8+AyUkJADVBZaqqrh69QKuXbviM586dSamT58N2Q8zKFVNxfHqr7C/8hA8PuYJptpdeMw4GiPXvQDJ5J8mJA2eG3VdeG1fMdp7xKZafKQZ2zdOxtjk8ABUdmuapsFV0gb7xUZAFa9NLT4EJxrb0FuTi1BdrXgCScLUheuRNneFX35WiIiIiAYaG4NERERERER0T/JarWje/Rasly4KmWQwIP7xpxC+KAuSr03C7nFWay9yco6jtVWc3WU2hyAraymSkkb4Zax2eyd2F+xCeV+DkCmahpXdLqya8igMExYOy+/FUOZVVezPq8L+01XwsS0lFk1NxOMrJsJsDM7bT6rdDVtuLTwNvWKoSOiMD8HxgmswS2dhVHzsSRoeg4Xrn0VMUuogVEtERETkH8H5zIyIiIiIiIioH2zXy9D0+qvwdIhLVhpHpSDppZdhSEwKQGWBV11difz8U3C5XEI2YsQoLFp0H0wmc7/H0TQNZxvP48PST+GAuN9cgtODx7RYjFv1EuTQmH6PR4OrpcuOHfuLUFHfI2Rmow7PrJmEeekJAajs9rhre2DLq4HmFK9NKdKIC3Ynaq8ehkUuha9+9ejJmZi19CHoDcG5XyIRERHRj2FjkIiIiIiIiO4Zmqqi4+B+tO/7PXxNYYpcsRKxDz0KWa8f/OICzOPx4MKFM7h+vVjIZFnGrFnzkJ4+1S+z9qyuPuwtfAdXu8qFTNI0ZPU4cf/4dQiZsgqSxOUXhxJN05Bf1IS3D1+HwyU21SaOisSLGzIQExGcDTPNo8J+oQGusnafuSM5FIeLy6G482FWxDcW6AwmzFv1GEZNnDnQpRIRERENCDYGiYiIiIiI6J7g7uhA0+uvwn69TMjk0FAkPvcCQqfPGPzCgkBXVydOnTqGri6x0REaGobFi5cjNjbeL2Nday3GO0V70auKMxIj3V5scVoweelfQIlM9st4NHhsDg/2HC7D2WJxCVpFlvBA1hism58KWQ7OJWE9HXbYTlVD7XYKmWTW4YZRwtXLebAoVyDJHuGY2BHjsGDd0wgJixqMcomIiIgGBBuDRERERERENORZL19C0843oPb1CZk5LR2Jz2+HPmr43czXNA3l5WU4f/40PB6x0TF69DjMn58Ng8HQ77EcHic+LvkIp1uv+sxn9Tjw0IhsRMx+EJLM2xFDzfXaLuzYX4z2HoeQxUeasX3jZIxNDg9AZbemaRqcxW1wXGoEVHEmsRofgmM1LbD35CFUVyfkkiRjyqJ1SJuzArLMGa5EREQ0tPGZOBEREREREQ1ZqtuF1g/eR/eJY2Ioy4h54EFEr10PaRjezHe5XDhzJgdVVRVCpigK5s1bhPHjJ/ll6dDyzkrsvrYH7R6xMWvxqthsVTB30Z9DiR/b77FocHlVFftyq3Agv8rX6rzImpqEx1ZMgNkYnLeYVJsbttwaeBqtYqhIaIkx4dS1AoQo52FUbMIhlvBYLNjwDGISUwehWiIiIqKBF5zP2oiIiIiIiIhuwdlQj8ZXfwtXvTjDRxcTg6QXfwbz+AkBqCzw2tpacerUUVitvUIWGRmNxYuXIzKy/zMo3V439t3YjxP1Z6D56C+m9TmxJWo64pY+AUln7Pd4NLhauuzYsa8IFQ09QhZi1OGZtWmYm+afJWgHgqumG/bTtdCc4l6IiDDiTE8fmgpzEaqUwld/fMzkTMxc+jD0Bl67REREdO9gY5CI/EJVVRQUFKCgoAClpaWor69HW1sb7HY7AMBsNiM2NhYjRoxAWloapk2bhmnTpnEZFiIiIiK6Y5qmoTvnJFrf2wvNJe5jFzpnLhKefhZKiCUA1QWWpmkoKbmGS5fOQVVVIZ84MQNz5syHTtf/2wHVPbXYdXUXmt09wA+aKgZVxYYeFYvnvgD9qKn9HosGl6ZpyC9qwtuHr8PhEptqE0dF4sUNGYiJMAWgulvT3F7YLzTAdV3cUxMA+hIsOFxcDoN6BmZFPEZnMGPeqq0YNXHmQJdKRERENOjYGCSiu+b1epGTk4N9+/YhNzcXvb3iu5G/q7Ky8nv/DgsLQ1ZWFu6//34sXrwYiqIMZLlEREREdA/w2vrQvHsnrBfOC5lkMCBu6+OIyF7il+UxhxqHw468vK9QX18rZAaDAQsWLEZqav+X8vSoHnxZcQiHak9CbD0CqXYXHjOPw8j12yAZh19zdqizOdzYfagM50pahEyRJTyQNQbr5qdCloPzZ8zTboPtVA3UHqcYmnUokVUUF+TBolyFJIv7bsaNHI/5a59CSNjw25OUiIiIhgc2BonojvX09ODdd9/FO++8g9bWVgBfv6P0bs7zxRdf4IsvvkBsbCyefPJJbN26FREREf4umYiIiIjuAfbyG2jc8Tt42tuFzDBiJJJeehnG5BEBqCzwGhvrkZt7Ana7uEdaXFwCsrOXITQ0rN/j1FsbsfvqLtQ5xVlWiqZhVbcbK6c9BuP4zH6PRYPvem0XduwvRnuPQ8jio8zYfv9kjE0OD0Blt6ZpGpxFrXBcbgJU8fWpJ9aMIzeb4bbnI1QnLj8sSTKmLlqPSXOWc2UbIiIiuqexMUhEt81ut+PNN9/Ezp07YbV+vXH7Nw1BSZIwduxYpKWlYdy4cUhISEBUVBTMZjM0TYPD4UBHRweam5tRWVmJ0tJSVFZWfvv41tZW/Od//id27NiB5557Ds899xxCQkIC9rkSERERUfDQVBUdnx9A+77fAz6Wx4xYuhxxj26BrDcMfnEBpqoqrl69iGvXLvvMp0yZgRkz5vS70aFqKo7ePIYDVUfhhdh0GeFwY6uShNFrtkMOiezXWDT4vKqKfblVOJBfBV/v+cyaloTHV0yAyRCct5HUPhdsubXwNFnFUCejIVyPvMICWHTnYVTswiGWiDgsXP8MohNTBqFaIiIiosAKzmd0RBR0Dhw4gH/7t39DS0vLt808i8WC++67DytXrkRmZiaiou5sqZXOzk6cPXsWR44cwcmTJ2G1WmG1WvHrX/8a77//Pv7u7/4O69evH4hPh4iIiIiGCE9XJxpffw320hIhky0WJD77PEJnzgpAZYFntVqRm3scLS1NQmYymZGVtRTJySP7PU6zrRW7r+5ClV1cWlLWNCzrdmJtxkMwTlo8LJdwHepaOm3Ysb8YFQ09QhZi1OGZtWmYmxYfgMpuj6u6C/bTddB87IWoRRiR196LtoZTCNOVwdflOXbKAsy4bzP0BuMgVEtEREQUeJJ2N+v/EdGwk5aW9u3fp02bhsceewxr166FyeSfzeadTic+//xzvPvuuygoKAAAyLKM4uJiv5z/XlJTU4Pf/va3OH36NNrb2xEZGYnMzEz80R/9EcaNGxfQ2jo7bfB4xBfkNHRFR1ugKDK8XhUdHX2BLofuIby2aCDwurr3WK9eQdNbr0O1irOAzBMnIfGFl6CPjh7QGoL1uqqpqcLp0yfhcon7qCUnj8SiRffBbO7fChyqpuJkbS4+K/8cbh+7CSY4PdiqxmD84pcgh8X2a6zhKNDXlqZpOF3YhLePXIfTR1Nt0qhIvHh/BqLD/fOaz980txf28w1w3RCXtQWAnjgzjhRXwIQz0MmdQq43mjF35WMYNXHGAFc6uAJ9XdHA0ekUREVxZSUiIuo/zhgkots2b948/PEf/zEyM/2/X4jRaMSDDz6IBx98EGfPnsVvfvMbnD9/3u/jDHUXLlzA9u3b0dfXh5SUFCxduhT19fU4cOAAjh49ih07dmDevHmBLpOIiIioX1S3G20ffYCuY0fEUJIQs3ETotffD2kY7gPm9Xpw4cJZlJUVCZkkSZg5cx4mT57W75l77fZO7CnYjRt99eI4mobF3U6sn7AOIVNWQZKG3/dhqLM53Nh9qAznSsRZoIosYVP2GKzNTIUsB+cMUE+bDbacaqg9LjE061Dg9eBGYT4syhVIktj0jB81AZlrnkRI2J2tekNERER0L2BjkIhuy44dO5CdnT0oY2VmZiIzMxO5ubmDMt5Q4XA48Od//ufo6+vD888/j7/+67/+dq+Yzz77DH/7t3+Lv/zLv8Thw4e5PyMRERENWa6mRjS++ls4a2uETBcdjaQXfwbzhIkBqCzwuru7cOrUMXR2tgtZaGgYsrOXIy6uf0s+apqG0w1n8XHZZ3BCbKjEuDzY4g5D+rK/ghyZ2K+xKDCu13Zhx/4itPeIs03jo8x4aeNkjEkKD0Blt6apGpxFLXBcboKPrS7hijHjcEUjVNcZhOp8NLUlGVOzNiBtzjI2tImIiGjYYmOQiG7LYDUFvysrK2vQxwxmhw8fRmtrK0aPHo2/+qu/+rYpCAAPPPAAjh07hkOHDuHTTz/FE088EcBKiYiIiO6cpmnoyctFy9490FziLKDQmbOR8MxzUEJDA1BdYGmahsrKGzh7Nhcej0fIU1PHYsGCbBj6uUdal7Mb71x7B8U9VT7zBd12bBy9AmHTh+dszaHO41WxL68KB/Or4GtTmaxpSXh8xQSYDMF5q0jtc6EvpwbeZh/LY+pkVJsVnCsqgEV3HnrFLhwSGhmHBeufQXRCyiBUS0RERBS8gvPZHhERCQoLCwEAc+fOhaIoQj5//nwcOnQIR48eZWOQiIiIhhSvzYaWt3ej99wZIZP0esRteQwRS5b2e3nMocjtduHs2VxUVpYLmaIomDt3ISZMSOvX10bTNFxovoIPSj6ETRMbjxFuLx51mDB18Z9AiRl11+NQ4LR02vDa/mJUNvQIWYhRh2fWpmFuWv9mmw4kV1UX7Pl10HzshahGGHGquRtdDRcQpiuDrx+FsVMXYuZ9D0Kn71/znIiIiOhewMYgEdFPsFqtyMvLw9mzZ1FcXIyqqir09vbCaDQiPj4e06ZNw4YNG5CdnT3gN6rs9q/f9RoREeEzj4yMBAAUFxcPaB1ERERE/mSvrEDTa7+Du61VyAzJI5D00sswjhgZgMoCr729FadOHUNvr9jMiYyMwuLFyxEZGd2vMayuPrxX9B4ud5b5zGf3OLA5eREiZj8ESeEthKFG0zScLmzC20euw+mjqZaWEokXNmQgOtwUgOpuTXN5YTtXD3dFpxhKQEekEcdKyhGinIdZEY/RG0Mwb9VjGDlh+iBUS0RERDQ08Fk9EQ06t9uN7u5uREZGQqcL3l9Db731Fv7jP/4DTqe494bH48HNmzdx8+ZNfPbZZ5gzZw7+7d/+DcnJyQNWT3T01zd96urqfObffLyrqwt9fX2wWCwDVgsRERFRf2mqis5DX6Dt958AXrFhEbFkKeIe3QrZOPxm+GiahpKSQly6dBaqqgr5hAlpmDt3Yb+fS19tLcLeondhVcWlW8M8XjzUp8OsRX8OJX5sv8ahwLA53Nh9qAznSlqETJElbMoeg7WZqZDl4JyJ626ywpZbA63PLYZmHS46nKgqPYsw3RVIkvg7JH7URGSueRIhYZEDXywRERHREBK8d+SJaEipra0FABgMBiQkJPg8prq6Gr/4xS+Ql5cHj8cDWZaxYMEC/N3f/R0mTJgwmOXelps3b37bFExISMDChQsxefJkxMTEwOl04sqVK9i3bx9sNhsuXLiAp556Ch988AFiYmIGpJ7MzEz87ne/w8mTJ9Ha2oq4uLhvM4/Hg48//vjbf7MxSERERMHM09WFpjd2wFZSJGRySAgSnnkOYbPnBqCywHM4HDh9+ivU1dUImV6vx4IFSzB6dP8adTa3HR+WfIRzbdd85tN6HXg4dg6il22FpDP0aywKjLKaTrx+oBjtPeKbHBOizNi+cTLGJIUHoLJb07wqHJca4Sxu85nbo0w4VFEPxXseobp6IZdkBdMWbcCkOUshSdwLk4iIiOiH2Bgkon4rKCjAli1bAACPPfYYfv7znwvHNDY2YsuWLeju7ob2h53uvV4vcnNzcfHiRezcuRPTpwfX8i6SJCErKwvbtm3DggULIMvff1H54IMPYvv27Xj++edx8+ZN1NXV4d///d/xi1/8QjjX3/7t36KgoOCOxl+5ciX+6q/+6tt/L1iwADNmzMCVK1ewbds2/MM//APS09NRX1+PX/7yl6iv/+8XxT+slYiIiChY9F0rQNObO+Dt7RUy0/gJSHrxZ9AP0Butgl1TUwNyco7DbrcJWWxsPLKzlyEsrH/NnOL2MrxTuBddXruQhXhVbOoFMue/DF1yer/GocDweFXsy7uJg/nV+MPLru/JmpaEx1dMgMkQnLeDPO022HJqoHaLDU3oZVToJFwquQaL7jwUWbyGw6LiMX/dM4hO4F6YRERERD8mOJ8JEtGQ8tVXX0HTNEiShM2bN/s85he/+AW6urp87sNnt9vxN3/zNzh48CD0ev1Al3vb/uIv/uLbfft+zIgRI/Cf//mfeOCBBwAAX3zxBX7+85/DbDZ/77jGxkbcvHnzjsZvbf3+PjuSJOFXv/oVXn75ZRQWFuKJJ574NjMajfj5z3+O//2//zckSUJ4eHC++5eIiIiGL83jQdvHH6LzyCExlCREb9iImA0bISnK4BcXYKqqoqDgEq5du/ztm+i+a/Lk6Zg5c26/3vzl8Djxadnvkdt80Wee1ufElvApiNvwJCSD2ecxFNxaOm14bX8xKhvEPSktJh2eWZOGOWnxAajs1jRVg7OwBY4rTYCPhqYn0ogTde3os11GmO46fG3vPm7aIsxYsgk6/fBbfpiIiIjoTrAxSET9dvXqVQBAVFQUpkyZIuTNzc04cuQIJEmCyWTCP//zP2PZsmVobGzE//gf/wOFhYWora3FF198gY0bNw52+T/qVk3Bb6SlpWHMmDG4efMm7HY7qqurkZaW9r1j9uzZ45ea4uPj8eGHH+Krr77C+fPn0dfXhxEjRmDdunXw/mFvntTUVBgMXPKJiIiIgoeruQmNr/0OzuoqIdNFRSHxhZcQMilNfOAw0NdnRW7uCTQ3NwqZyWTGokX3YcSI/s1+Ku+6iT0Fe9DmsQqZ0atiY4+KRXOegz51Rr/GocDQNA2nC5vw9pHrcLrEvfbSUiLxwoYMRIebAlDdrXl7nLDl1sDbKs6UhSyhKVyPkyXlCNWdg1npEg4xmCyYu+oxjBw/beCLJSIiIroHsDFIRP1WW1sLSZKEZtg3jh49+u2MwhdffBH3338/AGD8+PH4t3/7N6xduxYAcPz48aBqDN6J0NDQb//+zb6EA0WWZSxbtgzLli373sc/+eQTAMD8+fMHdHwiIiKiO9GTn4fmt/dAczqEzDJjJhKffR7Kd55LDSe1tdXIy/sKLpf4/DEpaQSyspbCbA656/O7vW7su3EAJxryfU3CwnibC1vM45G0/llIpuH5PRjq+hxu7DlUhnMlLUKmyBI2ZY/B2sxUyLKPKXYBpmkaXGXtsF9sBDyqmIcbkNtmRUvjJUTor0KSxKZnQsokZK55EubQiMEomYiIiOiewMYgEfVbW9vXm8InJCT4zM+ePfvt3x966KHvZWPGjMGUKVNQWFiIkpKSgStyALlcLlRVVX377+Tk5EGvwev1Ys+ePZAkCVu3bh308YmIiIh+SHXY0fz2bvSeyRcySadD3KNbEbF0uc+l5u91Xq8Xly6dRUlJoZBJkoQZM+ZiypTp/fraVPfUYlfBbjS7uoVMr2pY3+3G4hmPwTgu867HoMAqq+nEjgPF6OgRG8sJUWZs3zgZY5KCc4sB1eaG7XQtPPXiXqOQgI4oI46X1MIoX0SorkE8RFYwLet+TJp9HySJ+6sTERER3Qk2Bomo376ZIWcy+V6a5tKlS5AkCePHj/fZPBw1ahQKCwu/bTAONQcOHEBv79cvaCdPnoy4uLgBG+v69esYNWrU9/YwtFqt+Md//EcUFxfj8ccfR3p6+oCNfyvhQbo8Ed29b95dLssSoqMtAa6G7iW8tmgg8LoKHtbyCtz4f/8BZ1OzkJlGjMCEv/xzWMaMHvzC7oK/r6vOzk4cPvw5WlvFGV5hYeFYu3Ztv95o5lE9+KTwAD67fgTiHCwg1e7CU+ZUpD3+ChRL5F2PQ/13t9eWx6vivSNl+PhEOXxsSYkVc1Pw/MbJMBuD85aPtawNbccroTo8QiaHGXDR7kJ58TWE6i5AluzCMZGxiVjx6HbEJacORrlDDv8vJCIiolsJzmeJRDSkGAwGOBwO2GzinhA1NTVoa2uDJEmYPXu2z8eHh3/9LlaHQ1xeKth1dHTg3//937/998svvzyg47355ps4dOgQJk+ejPj4ePT29uLSpUuwWq1Ys2YN/v7v/35Ax78VReG7de9VkiRBUYbfjA4aeLy2aCDwugocTVXRsO8Aqve8A80j3vRPWLkCY154DsqPvKEsmPnjuiouLsbRo0fhdruFbMKECVi1atWPvtnudtR01eNXua+iuq9VyBRNw+puFx6c/wTCpy0bljM1g9WdXFsNbVb88p2LuF7TJWShZj1eeXQGFk0b/BVMbofX4UHrsQr0lojXJwC4Ei04WFQPyXsNYbrr8HWJTslcikXrtkBvMA5wtUMf/y8kIiKiH8PGIBH1W0xMDOrr61FRUSFkOTk53/595syZPh9vtVoB/PiMw2DlcrnwJ3/yJ2hvbwcArFixAitXrhzQMVesWIG2tjaUlZXhypUrsFgsmD59Oh555JFv92oMJK/X1/vSaSiTZQmSJEHTNKiqr92JiO4Ory0aCLyuAsvV1YWKX/0G3ZevCJkSEoKxL7+EmEULAQyt5wz+uK5cLhdOnDjuc+l8RVGwZMkSTJ06DZIk3dXXRtVU7C85hI+KD8LjYzfBEQ43nlQSMeWRP4EuIu4Pnwd/RgLtTq4tTdNw/GIddvz+Ghwuca+9qeNi8GdbZiI20hyUP1+26i60Hi6H1+oSMjlEjxKouHK5BKG689ApXcIxRnMolm5+FmPSv35NGYyfY7Dg/4X3Nr4Zl4iI/IGNQSLqt/T0dNTV1aGkpATV1dVITf3vJV1+//vff/v3zEzf+5fU1dUBAOLj4we0Tn9SVRV///d/jwsXLgAAUlJS8K//+q8DPu6KFSuwYsWKAR/nbvX0OODxiDcqaOiKjrZAUSSoqoaOjr5Al0P3EF5bNBB4XQVOX1Ehmt54Dd6eHiEzjR2HpO0/gxQbNyS/L/29rtrb25CTcww9PeJefxERkVi8eAWioqLR2SmuvnE7mm2t2F2wG1U2cdlWWdOwrMuBNZPuh2nyCvR4ZWAIfg/uVbd7bfU53Nj9ZRnOl4rLzyqyhAcXj8WaeSmQVTXofsY0jwr7xQa4Stt95s5oE45UtsLlvIEIfQEkSXwtkZiahnmrn4A5NCLoPr9gxP8L7106nYKoqJBAl0FERPcANgaJqN9WrFiBI0eOQFVVvPLKK/hf/+t/ISoqCu+99x6uXbsGSZIwbdo0JCYmCo91u90oKyuDJEkYM2ZMAKq/c5qm4R/+4R+wf/9+AEBycjLeeustREREBLgyIiIiosGleTxo+/0n6PzyczGUJESvXY+YjZsg6YbfS09N01BaWoSLF89AVcXZTePHT8LcuQuh1+vv6vyqpuJkbS4+K/8cbh+7CSY4PdjiicCEFX8FOUJ8Hk5DQ1lNJ3YcKEZHj1PIEqLM2L5xMsYkhQegslvztNlgy6mB6qN26GVUGiScK6qGRXcRFl2jcIgsK5i2eCMmzlwCSeIsKSIiIiJ/GX6vzojI79avX49XX30VN2/eRHl5OZ577jnhmBdffNHnY/Pz8+FwOL5tHgY7TdPwj//4j/jggw8AAImJidi1axdGjhwZ4MqIiIiIBperpQVNO34Hx81KIVMiIpH0wnaEpGcEoLLAczgcOH36JOrqqoVMr9dj/vxsjBkz/q7P327vxJ5re3DDWidkkqZhcbcD68asgmX6ekgyGypDkcer4rPcm/g8v9rnoq+Lpydh6/IJMBmC77aOpmpwFDTDWdDsc8VaT5QJx+s60d1bgwj9BciSuNd8eEwi5q97BlFxIwahYiIiIqLhJfieQRLRkKPT6fCb3/wGzz33HJqamoT8ySef/NHlLz/77LNv//5jS40GC03T8E//9E947733AAAJCQnYvXs3UlJSAlwZERER0eDqOZuPlj27oDrEG/qWadOR8Nzz0IUF5yymgdbc3IicnOOw2cQl/GJi4rB48XKE3eXXRtM0nG44h4+v/x5OTVxyMcblwRZHCNLu+zMo0Xzj2lDV3GnDa/uKcLOxV8gsJh2eWZOGOWnBuQ2Dt9sBW04NvO12MVQkNITqcbK4HmalCOH6Gz7PMX7GYkzP3gid3jDA1RIRERENT2wMEpFfjBkzBgcPHsTHH3+MCxcuoK+vD4mJiVi7di2ysrJ8PqazsxOFhYVITk6GxWLBjBkzBrfoO/BNU/Ddd98F8PV+iLt37/7efopERERE9zrV4UDL3rfRczpXyCSdDrEPP4rI5SshSVIAqgssVVVRWHgFV69ehKaJ06QyMqZh5sy5UBTlrs7f5ezGO4V7Udx902e+oNuOjSMXI2zWg5AUvtQfijRNQ+61Ruw9cgNOt9j4TUuJxAsbMhAdbgpAdT9N0zS4Sttgv9gIeMXrXw03IretF4111QjXnYNOFvfcNJpDMW/1E0geO3kwSiYiIiIatiTN1ysWIiL61g+bgnFxcdizZ8+Q2RNxMHV22uDxiDcxaOiKjrZAUWR4vSo6OsSZD0R3i9cWDQReVwPLUVONxld/C3ezuEKEPiERSS+9DFPKvfemqdu5rmy2PuTkHEdzs7hPmtFoQlbWfRgx4u5WmdA0DRear+CDko9g09xCHuH24lGbHlMXbYcSP/auxqDA+O61VdvQhV1fluFCaYtwnCJL2Lx4LFbPS4EsB1/TXe1zwZZXC0+jVQwloD3SiGOlzdChAiFKASRJ3BMzaXQG5q1+HCbL8Jxp7E/8v/DepdMpiIoKCXQZRER0D+DbCImIbuGf//mfv9cU3L17N5uCRERENGxomoauo4fR9vGH0DweIQ9flI34x56AbAq+WUyDoa6uBnl5J+B0OoUsMTEZWVlLERJiuatzW119eK/4fVzuKPWZz+mxY1N8JiKXPwpJx2UXh6rCijb8cu8ldPaK11BCdAhe2piB0YnB1zDTNA3uyi7YztYBbrHZh1A9zlkdqCyuhkV3EQZZfFOBrOgwffEDmDBj8bCcaUxEREQUCGwMEhH9hH/5l3/B3r17Afx3U3DsWL4Tm4iIiIYHT28Pmt96A30FV4VMNpkQ/9SzCM+cH4DKAs/r9eLSpXMoKbkmZJIkYfr02ZgyZQZkWb6r819tLcLeovdgVcVmUZjHi4d6Zcxc+Ap0SZPu6vwUeB6vincOleKj4zfgay2nxdOT8NjyiTAa7m752YGkOjywn6mDu1pcEhQAemNMOFzaAs3bgAj9BciSeB1HxCRh/vpnEBmbPNDlEhEREdF3sDFIRPQj/uM//gNvv/02gK9v7jz99NOorKxEZWXlTz4uIyMDycl8cUtERERDm62kGI2vvwZvd5eQmcaMReL2n8EQFz/4hQWBnp5u5OQcQ3t7m5CFhFiQnb0cCQmJd3Vum9uOD0s/xrnWAp/59F4HHoqahuj7noBkMN/VGBR4zZ02/OKdS7hR2yVkFpMOz65Nw+xJwfnz5a7rge10LTS7OIMYZh2ueTwoLKxHiFIIk77c5zkmzFyC6dkboej0A1wtEREREf0QG4NEdFt27tyJJ554Anr94Lxwc7vdeOedd/Dss88Oyni+XLp06du/a5qGX/7yl7f1uF/84hfYvHnzQJVFRERENKA0jwft+36Pji8Owtc0pqg16xC7aTMk3fB8OVlZWY4zZ3Lg8Yj7/Y0aNRoLFy6G0Xh3y6qWdFzH29feQZfXLmQhXhWbulVkZr4A3ahpd3V+CjxN0/DVlQZ8cLwcTre4N3d6ahRe2JCBqDBjAKr7aZrbC/uFRriut/vMHTFmHKlohd3RjnDdOejkHuEYU0gY5q1+AkljMga6XCIiIiL6EcPzlRwR3bH/+3//L95++2289NJL2LRp04A1CF0uFz799FO89tpraGhoCGhjkIiIiGi4cbe2onHH7+CorBAyJTwcic9vh2XylABUFnhutxt5eV+houK6kMmyjDlzFmDSpIy72ifN4XHi99c/Q07TBZ95ep8Tj1gmIv7+ZyEZ726/Qgq8zl4n3vqiBIWVHUKmyBI2LxmL1fNSIAfhXnuelj7Ycmug9rrE0KCgXAecL2yAUa5AuO4aJEncczB57GTMXfU4TCFhg1AxEREREf0YSdN8rWRPRPR96enp3/49JiYGjz76KB566CGMGDHCL+evr6/Hhx9+iI8++gjt7e3QNA2KoqCoqMgv56fB0dlpg8cjvvOZhq7oaAsURYbXq6Kjoy/Q5dA9hNcWDQReV/3Te/4cmne/BdXuY7balKlI3PYidOHhAagssKKjLWhvb8OBAwfQ2dkp5OHhEVi8eDmio2Pv6vzlXTex+9oetLutQmb0qtjY7cHCmY/DMG7eXZ2fgsO5kmbsOVSGPoe4/GZyrAUvbEjH6MTg+/nSvCocV5vhLGwBfNw9ckeZcLy2A529PbDoLsAgNwvHKIoe05dswvjpWXfVOKc7w/8L7106nYKoqJBAl0FERPcANgaJ6LYUFhbiX/7lX3D16lUA+PYF3bRp07B8+XLMnz8fU6ZMgSzLt3U+VVVRWFiIM2fO4NixY7h27Ro0TcM3v5JmzJiB//2//zcmT548MJ8QDQg2Bu89vLFAA4XXFg0EXld3R3U60fLeO+jJOSWGioK4hx5B5IpVkG7zed69RNM01NVV4NSpk/B6xec448ZNxLx5i+5qNQ2314195Qdxov60r34Lxttc2KIfhcTsFyCHRNxF9RQMrHY33j5chnMlLT7zNQtG49n16bBZnYNc2a15O+2w5dbA2+EQQ52MOosOOaUt0EuNsOguQpbEzyEiNhkL1j2DiNikQaiYAP5feC9jY5CIiPyFjUEiuiOff/45fv3rX6OyshIAvveOT5PJhHHjxmHs2LFITExEZGQkTCYTNE2D0+lEZ2cnmpqacPPmTVRUVMDh+O8XmN/8Kho7diz+5E/+BGvXrh3cT4z8go3Bew9vLNBA4bVFA4HX1Z1z1tag8dXfwtXUKGT6+AQkbX8ZptGjB7+wIOB0OnD69CnU1lYJmU6nx/z5WRg7dsJdnbu6pxa7Cvag2dUlZHpVw7ouJ5ZMfRSGiYs4w2oIu1bZjjc/L0G3VVx+MyLUgD99dAbmTU4Kut9ZmqrBWdIKx6UmQBVvGakRRpxq6UFjhxUhSgFMSqXP80ycvRTTFm2Aohucferpa/y/8N7FxiAREfkLG4NEdMdUVcWRI0ewe/duXLx48XvZ7d64+OGvntmzZ+OZZ57BypUrefNjCGNj8N7DGws0UHht0UDgdXX7NE1D14ljaPvgPWgecWnDsAULkfDEU5BN5gBUF3jNzY3IyTkOm028jqKjY7F48XKEh9/5LD6P6sEXlYdxuOYriDuwAal2F7ZKiRi5+EXIoTF3UTkFA4fLgw9OVOCry/U+83np8Xhy1SSkjIgMut9ZXqvr61mCzT7qkSW0RhhwvKQZErphUc5CJ/cKh5ks4chc/QQSR6eL56ABx/8L711sDBIRkb/oAl0AEQ09sixj9erVWL16NaqqqrB//36cOHECJSUlQsPvx0iShPT0dCxduhT3338/Rg/Td6ITERERDTZvby+adr2JviuXhUwympDw5FMIX7AoAJUFnqqqKCi4hGvXLvt8XpuePgWzZmVCUZQ7Pne9tRG7C/agztEmZIqmYXWnA8vTN8GYsYxvlBvCyuu68fqBYrR0iXt1Wkw6PLlqEjIzEgJQ2U/TNA2u8k7Yz9cDbrFtrYUacLbXhpvFHTDK5QhRCiFJ4nHJ46Zg3qrHYTSHDkbZRERERHQXOGOQiPyms7MTBQUFKCsrQ11dHdrb22G3f/2C2Gw2IzY2FiNHjsSkSZMwdepUREVFBbhi8jfOGLz38B3HNFB4bdFA4HV1a32F19D01uvwdncLmTF1NJK2vwxDQvA1LQaD1WpFbu5xtLQ0CZnJZMKqVasRGXnnXxtVU3G0+gQOVB6G18dugiMcbmz1RmH04pcgRwzPr/29wO1R8VnuTXxxthq+7rJMGRON59alIyrM+O3HguV3lmp3w5ZfB09tj8+8J8aEw6Ut8Hj6EKq7AL0s7peo6PSYcd9mjJu6kI3tAAuW64r8jzMGiYjIXzhjkIj8JioqCkuWLMGSJUsCXQoRERERfYfqcqHtow/Qdfyozzxq9RrEPvgwJN3wfIlYXX0T+fmn4HI5hWzkyFFYv34dzOaQO77J3mxrxe6CPaiyic1GWdOwrNOONePXwjRtLSRZvuv6KbBqW6zYsb8Yda1WITPoZWxZNgH3zUgOyoaZu6Ybtvw6aA5xSWGE6HDV5UFxYRP0UgMi9BchS+J+iZHxI7Fg3TMIj2Zjm4iIiGgoGJ6v+oiIiIiIiIYJR001mna8Cldjg5ApYeFIfP4FWKZMC0BlgefxeHDhQj6uXy8RMkmSMGPGHGRnL4Rer4PX62tXQN9UTcXJ2jx8Vn4Qbh+7CSY4PdjismD8sj+HEj2iX58DBY6qajh0rgaf5lTC4xWnCY4bEY4XNmQgIQhn+GguL+znG+Aq7/CZ22PMOHyjBTanEyFKAUzKTZ/Hpc1ZjikL10HR6QeyXCIiIiLyIzYGiYiIiIiI7kGaqqLz0Jdo+/3HgFdc6tsydRoSnt0GXUTk4BcXBDo7O5CTcwxdXZ1CZrGEYvHi5YiLS4B8hzP52u2d2FP4Nm701gqZpGlY3GXH+pSlCJn9ACSZL8mHqpZOG14/WILyOnFZXkWWsCl7DNZmpkKWg2+WoKfJClteLVSrOPsPRgXXZQ0XCxuhSF2I0J+DIvUKh5ktEchc+yQSUiYNQsVERERE5E98FUJERERERHSPcbe3o+nNHbCXlQqZZDAg7pGtiLhvaVAubTjQNE3D9esluHAhH14fDdPU1LFYsCAbBoPRx6N/+rynG87h4+u/h1MTzxvj8mCL3Yi0xX8NJW703ZZPAaZpGk5ebcD7x8rhdIvf55FxFrywIQMpCWEBqO6naV4VjstNcBa1+szd0SYcre5Al9UJk3wDZqUQkuRjX8zx0zB35WMwmi0DXTIRERERDQA2BoloQJSUlODixYtobGxET08PvF4v/vVf/zXQZRERERHd83rOnkHL27ug2u1CZkxJRdKLL8GQlByAygLP6XQgP/8UamqqhEyn02Hu3IUYP37SHTdMu5zdeKfwXRR3V/rMF3TZcX/SfISveASSznA3pVMQ6LI68dbnpbhW2S5kEoA1mSnYlD0Wel3w7Rfp6bDDllMDtcshhjoZNSEK8oqaIMGOMN0F6OUW4TBFZ8DMpZsxdsqCYfmmAiIiIqJ7BRuDRORXX375JX7961+joqLi249pmgZJkoTGYFtbGzZt2gSv14vZs2fj17/+9WCXS0RERHTP8Nr60PLOHvSePSOGkoTotesRs3ETJN3wfBnY3NyInJzjsNn6hCwqKhrZ2csRGRl1R+fUNA0Xmq/gg5KPYNPcQh7p9uIRq4wpC1+BLolLLg5l50qasedQGfocHiGLizTh+fUZmDgqcvALuwVN1eAsaoHjSjOgirP/1EgjvmrqQXOtHXqpHhbdJciSuMRoVMIoLFj3DMKi4gejbCIiIiIaQMPzFSERDYif//zn+PDDDwF8fZPkVmJjY7FgwQLs378fx48fR3NzMxISEga6TCIiIqJ7jq20BE1v7oCno0PIdDExSHx+O0ImDs/GlKqquHbtMgoKLvl8jpqWNhmzZ2dCUe7s5XGvy4r3ij/AlQ5xuVYAmNNjx6bomYhc9jgkvemuaqfAs9rdeOfIdZwtbvaZ3zcjGY8uGw+TIfhur3h7nLDl1sDbahNDWUJLhAHHS5qhaR6EKFdhUqp8nEVC+tzlmLxw3R3/jBARERFRcOKzOiLyi//4j//ABx988O2/s7KyMH/+fOTk5ODs2bM/+rhNmzZh//79X+/VcfIkHn300cEol4iIiOieoLrdaP/sU3Qe+gLw0fQKW7AQ8Y89CSUkJADVBV5fnxW5uSfQ3NwoZEajEQsX3odRo1Lv+LxXW4uwt+g9WFWnkIV5vHioR8PMzO3QjZp6V3VTcCisbMebn5egyyrOoIuwGPDcujRMGxcbgMp+mqZpcN3ogP18A+BRxTzMgPwuG6qLO6FInQjVn4MiWYXjzKGRmL/2KcSPmjAYZRMRERHRIGFjkIj6raqqCm+88QYAIDw8HL/61a+QmZkJAGhsbPzJxuD8+fNhNpvhcDhw9uxZNgaJiIiIbpOzoR5NO16Fs7ZGyOSQECQ8+QzC5mUGoLLgUFNThdOnT8LlEpt3CQlJyM5ehpAQyx2d0+a248PST3Cu9arPfHqvA5vD0hFz/9OQjHd2bgoeTpcXH5wox4nL9T7zOWnxeHr1JISa9YNc2a2pNjdsp2vhqe8VQwnoijbhSEkzPF4VJvk6zEoRJEl8U8HICTMwZ8UWGM28jomIiIjuNWwMElG/vf/++/B4PJAkCf/yL//ybVPwdiiKgkmTJuHKlSsoLy8fwCqJiIiI7g2apqHr+FG0ffQBNLe4r505LR2J216EPjo6ANUFnsfjwcWLZ1BWVixkkiRh+vTZmDJlBmRZvqPzFjSX4Lf5b6LLIy7LGOJVsanLg3lznoF+zOy7rp0Cr7y+G68fKEZLp13IQow6PLl6IjLTEyBJUgCq+2muqi7Yz9RBc3rFMESPy04XSgubIMGGMN0F6OVW4TCd3oBZyx7B6Ix5Qfk5EhEREVH/sTFIRP125swZAEBKSgpWr159x48fMWIErly5gqamJn+XRkRERHRP8XR1oemt12ErKhQySadD7OaHEbliFaQ7bHrdK7q6OnDq1DF0dXUKmcUSiuzsZYiPT7yjczo8Duy9+DGO3DztM0/vc+IR4xjEbdgG2Rx+V3VT4Hm8Kj7LvYnPz1T7WpUXk8dEY9u6dESFGQe/uFtQXV7Yz9bDXSle9wBgizHj0PUWOFxe6KV6WHQXIUvimwqiE1Mxf+3TCIuKG+iSiYiIiCiA2Bgkon5raGiAJEmYNm3aXT0+NDQUANDX1+fPsoiIiIjuKb2XLqJ591tQreJeYIYRI5H0wkswjhoVgMoCT9M03LhRivPnT8PrFWdLpaaOxYIF2TAY7qypU9FVhT15b6PV2SNkRq+KjV1OLJy+FfoJCzm7agira7Fix4Fi1Lb4+NnSy9iydDzumzkiKL/H7sZe2HJrodnERh9MCkqh4XJhIwAPLMpVGJUqH2eRkJG5EpPnr4WsKANcMREREREFGhuDRNRvNtvXyymFhITc1eMdDgcAwGgMvnffEhEREQWa6nCg5b130JOb4zOPXLkasZsfgqw3DHJlwcHpdOLMmVOorr4pZIqiYO7chZgwIe2Omjoe1YODFYdwpPYkfEwew3ibC1vkJCSufRFy6PBcsvVeoKoaDp2vwaenKuHxit/pccnheGFDBhKi7+51zkDSPCrslxrhKmnzmbtiTDh6sx3dfW4oUgdCdeegSOIbMUPCopC59inEjxw/0CUTERERUZBgY5CI+i0yMhJtbW3o7PS9dM2t1NTUAACih+k+OEREREQ/xl5RjqbXX4O7tUXIlMhIJG57EZaMyQGoLDi0tDQhJ+c4+vrEmV6RkdFYvHg5IiOj7uicDdYm7Ly2G/V2seGiVzWs63RgccaDMGYsDcoZZHR7WrrseONAMW7UdQuZIkvYlD0GazJToAThsryeNhtsuTVQu51iqJdRZVKQX9gEQINJLoNZKYYkiY3PUZNmYc7yR2EwBV/jk4iIiIgGDhuDRNRvKSkpaG1tRUFBwR0/trOzE4WFhZAkCWlpaQNQHREREdHQo3m9aD+wDx0H9wOqKuShs+cg4alnofxhSfbhRlVVFBZewdWrF6H52BBu0qTJmDMnE4py+y95VU3FV7W5+Kz8c3ggfs1T7S5sVWMwcvXfQA6P71f9FDiapuHU1Qa8d6wcTre47OyIOAte3JCBlISwAFT30zRVg/NaMxxXm+FrKqs3yoQTDV1orXFAhg0W3XnoZbHBrTMYMXvZI0hNn8vmNhEREdEwxMYgEfXbokWLcPHiRTQ3N+Po0aNYsWLFbT/2tddeg9vthiRJWLhw4QBWSURERDQ0uJqb0fTGq3BUVgqZbDIh/vGnELZg+O5p19dnRW7uCTQ3NwqZwWDEwoVLkJIy+o7O2enowu7Cd3C9p1rIFE3Dyk47Nk7bBHX8MkhBOIOMbk+X1YmdX5SioKJdyCQAazJTsCl7LPS64Psee7sdsOXWwttmE0NFQlO4HieKmwAABrkWIcplyJK472BM0mjMX/s0QiNjB7pkIiIiIgpSbAwSUb9t3rwZr776KlwuF/7pn/4JaWlpGDly5C0f9+mnn2Lnzp2QJAnh4eF44IEHBqFaIiIiouCkaRp6ck6h5f290JziEoGmceOR9MJL0MfFBaC64FBTU4XTp0/C5RK/PgkJScjKWgqL5fZnUWqahvNNl/F+6UdwaB7xnE4PnnCFYNZD/xNKzCh0dIh7tNHQcL60Bbu/LEWfQ/w+x0aY8MKGDEwcFTn4hd2CpmlwlbbDfrEB8LEPohZuRF6HFbXFnQDcsChXYFRqhOMkSUJG5mpkzF8NWVYGoXIiIiIiClZsDBJRvyUmJmLbtm347W9/i7a2Njz88MN45ZVXsH79euFYp9OJS5cu4d1338WRI0egaRokScKf/umfIiSEe1sQERHR8OTp7UHzrrfQd+WyGCoKYu5/ANFr10NShucNfa/XgwsXzqKsrEjIJEnC9OmzMWXKDMh3MJuvz23De8Uf4FJ7sc88q9OGDckLkbL6WeiMRni94vKiFPz6HG68c+Q6zhQ1+8wXT0/GlmXjYTYG3+0Rtc8NW14NPI3iHpqQgM5oE44UN8OralCkdoTqzkORxOa1JTwamWufRtyIsYNQNREREREFO0nztSEDEdEd0jQNf/Znf4bDhw9/b1krvV4Pl8sFSZIQGhoKq9X6vccAwIMPPohf/OIXg14z+V9npw0ej7hXCw1d0dEWKIoMr1flLAnyK15bNBCG6nXVd60ATW+9Dm9Pj5DpExKR9MJ2mMYM3xv6XV2dyMk5hs7ODiGzWEKRnb0M8fGJd3TO4vYyvF24F91eu5BFuL14tBeYsvAF6JLTh+x1RUDhzXa89XkpOnvFGaYRFgOeXZuG6eMDt6TmT11brspO2M/WQ3P5eG5t0eOC3Ykb9T0AVJjkMpiVEkiSeHsnNW0OZi1/BAajeYA+Cwo2/J1179LpFERF8Q3VRETUf8H3ljgiGpIkScJ//ud/4le/+hVee+01eL1fv4D9Zv9AAOjt7f3eYxRFwcsvv4xXXnll0OslIiIiCjTV6UTrRx+g+8Qxn3nEkqWIe3QrZKNxkCsLDpqmoby8DOfO5X373PK7UlLGYMGCxTDewdfH5XXh0xsHcKrhjM98Zo8Dm8PTEbXxGUgG3nwdqpwuLz74qhwnLtX7zOekxePp1ZMQatYPcmW3pjo9sJ+ph7uqy2dujTHjcFkznG4VMvpg0Z2HXhb3TNQbTJi1/BGMTp87wBUTERER0VDDxiAR+Y0sy/izP/szPPzww9i1axdOnTqFqqoq4bikpCTcd9992LZtG0aNGjX4hRIREREFmKO6Ck2vvwZXY4OQKWFhSHhmG0JnzAxAZcHB5XIiPz8H1dWVQqYoCubOXYgJE9K+t1LFrVT31GLntT1ocXYJWYhXxaZOF+bNeRL6sfP6UzoFWEV9N14/UIzmTnE2aIhRhydXTURmRsIdXTuDxV3fA1teLTS7uA8izDoUqV4UFDYCAAxyDUKUy5AlH3smJo/F/LVPwRIRM9AlExEREdEQxKVEiWhAdXV1obW1Fb29vQgJCUFMTAzi4uICXRYNEC4leu/hUkQ0UHht0UAYCteVpqroPPQF2n7/CeBjFpxl2nQkPLMNuoiIAFQXHFpampCTcxx9feK+apGRUVi8eDkiI6Nv+3xe1YtDVcfwRdVR+NolcGKfE48qyUhYsh2yJUrIh8J1RYDHq2Jf3k0czK+Gr7sck0dH4bl16YgONw1+cT/im2vL7XCj4Ug5XGXizD8AcMaYcaSyFb02DyS4EaJcgVGpEY6TJBmTF6xB+ryVkOXhuR8p8XfWvYxLiRIRkb9wxiARDajIyEhERkYGugwiIiKigHO3t6HpjR2wXy8TMslgQNyjWxGxZGlQzmQaDKqqorDwCq5evQhf71+dNCkDs2fPh053+y9jm22t2HXtbVT3NQqZXtWwrsOOJekbYZiyApIk96t+Cpy6Vite31+MmhaxmWzQyXh02XgsnTkiKH+27A09aP78OtxdDjE0KKg0SDj7h1mCOqkNFt15KJJNONQSEYP5a59GbPKYgS6ZiIiIiIY4NgaJiIiIiIgGWM+Z02h5Zw9Uu7i8oTF1NJJefAmGxKQAVBYcbLY+5OaeQFOTuLSqwWDEwoVLkJIy+rbPp2kacurz8emN/XBp4szMkQ43trpCkbLyL6BEJfendAogVdVw+HwtPjlVAY9XbCaPSw7H8xsykBgdfDNsNK+KjtM16DpXB/iY4eiJMuFEfSfaup0AVJiVUpjkEvjqbY7OmIdZSx+C3mge8LqJiIiIaOhjY5CIiIiIiGiAePv60PLOHvSeOyOGkoTodRsQc/8DkO5gFty9pra2GqdPfwWn0ylk8fGJyM5eBosl9LbP1+3swdtF76K4q0LIZE3Dsk4bVqfcB/PsByEpw/frPtS1dtnxxoFiXK/rFjJFlvBA1hisnZ8CRQ6+maCeDjvsebXwdohvFIAioSFMj5PFTQAAGVZYdOehlzuEQ/VGM+as2IKUSbMGumQiIiIiuofwVRAREREREdEAsJWWoOnNHfB0iDf0dbGxSHp+O8wTJgagsuDg9Xpw8eI5lJYWCpkkSZg2bRamTp0J+Q4aO5dbruHd4g/Qp4pNxliXB1t7FUzI/nPoEif0q3YKHE3TkFPQiHeP3YDTJc4GHRFrwQsbMpCaGBaA6n6a5lXhKGiG81qLz1mCaoQRuW29qK/rBKDBINfAolyBJHmEY+NGjEPm2qdgCb/9/TaJiIiIiAA2BonIzy5cuIDPP/8cBQUFqKurg9VqhdcrvmD3RZIkFBcXD3CFRERERANLdbvR/vtP0Hn4S8DHXnnhCxYh7rEnoIQE3/KGg6W7uwunTh1DZ2e7kIWEWJCdvQwJCbe/tKrdY8cHpZ/iXMsVn/n8Lhs2Rs9A+LInIBm43OJQ1W11YucXpbhaIV43EoDV81Lw4OIx0OuUwS/uFjytfbDl1ULtFpvWkCW0RxlxrLgZXlWDBBdClMswKnXCoZIsY8qCdUibu+KOmuZERERERN9gY5CI/KKzsxN/+7d/i9zc3G8/pvm4EUZERER0L3PW16Pp9d/BWVsrZHKIBQlPPYOwufMCUFlw0DQN5eVlOH/+NDwecRZUSspoLFiwGEaj6bbPeb2zArsL30Gn2ypkYR4vHun0YFrmc9CPnt2v2imwLpS2YPehMljtbiGLjTDh+fXpmJQSFYDKfprmUeG43ARnSavPWYJKhAnn+xwoK/x66VCd1AaL7jwUySYcGxoZh/lrn0ZMUupAl01ERERE9zA2Bomo35xOJ55++mmUl5ezGUhERETDkqaq6Dp+DG0fvQ/NR8MrJD0DCc+9AH308F32z+Vy4cyZU6iqqhQyRVEwZ84CTJyYDkmSbut8bq8b+yq+xIm6HF/9Fky1OvCQIRUx978AOSSin9VToNgcbrxz5Dryi5p95ounJ2PLsvEwG4Pv9oanyQrb6VqovS4xlAB7chi+KGj4w5KoKsxKCUxyKXz9CIyZPB8zlz4EvcE44HUTERER0b0t+J45E9GQs3PnTty4cQOSJEFRFGzevBnr16/HxIkTERERAUUJvqV8iIiIiPzF09WJprfegK3Ix155Oh1iNz+CyBUrIQ3jZf9aW5uRk3McVmuvkEVGRiE7ezmiom6/aVrX24Cd1/ag0SEuKWn0qtjUYUfm1IdhSL/vthuNFHyKbnbgzc9L0NkrLr8ZbjHg2bVpmDE+NgCV/TTN7YX9YiNcZeL1CQCIMOKy3YnSC1/PLJZhRajuHHRyp3CowRiCOSu3YNTEmQNZMhERERENI2wMElG/ffHFF9/+/d///d+xdu3aAFZDRERENHh6L15A8+63oPb1CZlhxEgkvfgSjCNHBaCy4KBpGgoLr+DKlQs+V5aYODEdc+YsgE53ey9NVU3F0eqTOFD5Jbw+5gmOtbmwxRuJ5DV/DTkisd/1U2A43V58dKICxy6Je+wBwOxJcXh69SSEhRgGubJbc9f3wJZfB61PXPIUsoTuODOOFjfD5fIC0GCQq2FRrkKSxJnG8SPHI3PtUwgJC74lUomIiIho6GJjkIj6raamBpIkYdq0aWwKEhER0bCgOuxoeXcvevJyfOZRK1cjZvNDkPXB17gYLDZbH3Jzv0JTU72QGQwGLFiwBKmpY277fG32DuwufAcVveL+jTpVw+qOPiwduxKmWRshyVyxYqiqaOjG6wdK0Nwh7rFnNurw5KqJmJ+REHQzQVWnB44LDXCVi7P+AABRJpzrtqPiSgMAQIILIcolGBXx50OSZUxduB6T5iyHPIxnGhMRERHRwGBjkIj67Zt3eE+cODHAlRARERENPHv5DTS98Rrcra1CpouKQuK2FxGSnhGAyoJHXV0N8vK+gtPpELL4+ERkZS1DaGjobZ1L0zScabyAD8s+hVMTZ1UlOd3Y2qfHmPv+Gkr82H7XToHh8arYl1eFg/lV8LVtecboKGxbl47ocNPgF3cL7ppu2M7UQbOL1ycUCe3RJhwvaoLH+/UnppeaYNFdgizZhcPDouIxf93TiE5IGeiyiYiIiGiYYmOQiPptxIgRKC0thdMp7v1BREREdK/QPB60H9iHjoP74atzETpnLhKefAbKbTa87kVerxeXLp1FSYmP/RYlCVOnzsS0abNuexZUr8uKvcUfoKCjVDyfpmFJlw1r4+fCsuIxSHpjv+unwKhvtWLHgWLUNFuFzKCT8cjS8Vg6awTkYJsl6PDAfrYe7qou33m0CafbrKgt+GYWoRshSgFMSpXP48dOXYiZ9z0IHa9lIiIiIhpAbAwSUb9lZ2ejpKQEBQUFgS6FiIiIaEC4mpvQ9PprcNysFDLZZEL8408hbMHCoFvecDB1d3fh1Klj6OxsF7KQEAuyspYiMTH5ts93ra0Y7xS9j16vOKsqyu3Fli4P0hdshy5ler/qpsBRVQ2Hz9fik1OV8HhVIR+bHI4XNmQgMTokANX9OE3T4L7ZBfu5emhOr3iATkZThAEni5uhql+/iUAntcCiuwhFEpdINZotmLNiK0ZO4LVMRERERAOPjUEi6rcnnngCO3fuRHV1NY4ePYoVK1YEuiQiIiIiv9A0Dd05J9H63l5oLpeQm8ZPQNLz26GPiwtAdcFB0zRUVFzHuXN58HjEpRRHjRqNBQsWw2S6vSUgHR4nPr6xD6cbz/vM53TbsckyDhEbt0E2h/erdgqcti47Xj9Yguu1XUKmyBI2Zo3BuvkpUIJsjz3V5obtTB08tT0+c2+MGTmN3Wis7fjDRzwIUQphUip8Hj9q/GTct/k5uDzDdz9SIiIiIhpcbAwSUb8lJCTg5z//Of6//+//w//8n/8T4eHhmDdvXqDLIiIiIuoXT28Pmne9hb4rl8VQURCzcROi166HFGSNi8Hkcrlw5kwOqqrEpocsK5g7dwEmTky/7ZmUld1V2HXtHbS5uoXM4lGxucOB2TO3QDcxa1jPzhzKNE1DbkEj9h67AadLnG03ItaCFzZkIDUxLADV/ThN0+Aq74D9fAPgFmc3wqCgzqIgp7Dx2w/ppDZYdBegSH3C4Tq9AQvXbsHU+Uuhqho6OsRjiIiIiIgGAhuDROQXDz/8MIxGI/7hH/4Bzz77LFasWIFVq1Zh/PjxCAsLu+0bN8nJt7+8FBEREdFAsRZcRfPON+DtEWcF6RMSkfTiSzCNHhOAyoJHa2sLcnKOwWrtFbKIiCgsXrwcUVHRt3Uuj+rB5zeP4HD1CYi7NwLpfU48osUibt3fQQ4fvrMzh7ruPhd2fVGKK+VtQiYBWD0vBQ8uHgO9Thn84n6C1+qC/XQtPI3iHogA4Ik140RNJ9qqHd88AmalCCb5Bny9DIobOR7zVj+BlDEpf3id5OuqJyIiIiIaGGwMEpHfbNiwAY2Njfh//+//4ciRIzhy5MgdPV6SJBQXFw9QdURERES3pjqdaP3ofXSfOO4zj7hvGeIe2QLZaBzkyoKHpmkoKrqKy5fPQ9PEhsbEiemYM2cBdLrbe7nZ2NeMXdfeQa2tScgMqooN7TYsmrgWxunDe3bmUHehtAW7D5XBancLWWyECc+vT8eklKgAVPbjNE2Dq6wd9ouNgMfHLEGjgptGGWeu/fcsQUXqQKjuAhRJbJgrOj2mZW3EhJnZkCRey0REREQUGGwMEpFfNDY24sUXX0RFRcW3swN93SgiIiIiClaOqio0vv47uJvEBpUSFo6E57YhdNqMwS8siNhsNuTlnUBjY72QGQwGLFiwGKmpY2/rXKqm4mTdaXxWfgBuTWy6pNpd2Go3Y8Tyv4MSm9rv2ikwbA433jlyA/lF4s8VAGRPS8LW5RNgNgbX7QlvtxO207Xwtvhe4tMZa8axm+3otn6z96gXZqUEJvk6JEl8HRSTNBqZa55EWFT8AFZNRERERHRrwfXMm4iGpO7ubjzxxBNoaGj43sfNZjPCw8OhKMG1FBARERHRd2mqis4vP0fbZ58CXnHPM8v0GUh4Zht04eEBqC541NfXIC/vKzgcDiGLi0tAdvYyhIbe3r5wnY4u7Cl6F2XdN4VM1jSs7OjDiuSFMK9+FJLO0O/aKTCKqjrw5sESdPY6hSw8RI9n16ZjxoTYAFT24zRVg7O4FY4rTYDXxxsdzTqUSRoufW+WYBcsynnoZHHpYVlRMHXhBkycvRQyZ7wSERERURBgY5CI+u31119HQ0MDJEmCyWTCSy+9hPXr12PUqFGBLo2IiIjoJ7nbWtH0xg7Yb1wXMslgQNyWxxGxeMlt75d8L/J6vbh06RxKSq75zKdNm4Vp02bddtPjQtNlvFf6MeyqS8jinR5s7dEwbtEfQTdySr/qpsBxur346KsKHLtY5zOfPTEOT62ZhPCQ4Gr6ejvtsOXVwttu95nbYs04eqMVfQ7PHz6iwiSXwayU+JwlGJWQgsw1TyAiJmkAqyYiIiIiujNsDBJRvx09ehTA13sE7tixA3PmzAlwRUREREQ/TdM09J45jZZ39kD1MQPOOHoMkl54CYbExABUFzx6erpw6tRxdHS0CVlIiAVZWUuRmJh8W+fqc9vwfuknuNha4DNf1GXDhvA0hD3wLCRTaL/qpsCpbOjB6weK0dRhEzKzUYcnV07E/MkJQdVs17wqnIUtcBS0AKqPWYIhehR6PLj2vVmCPX+YJdglHC7LCjLmr0H63BWQuXoKEREREQUZNgaJqN/q6+shSRJmz57NpiAREREFPa/Viua3d8N64ZwYShKi129AzIYHIOmG78slTdNQWXkDZ8/mwuPxCPmoUalYsGAJTCbTbZ2vpOM69hS9i263uF9bhNuLRzqcmDL3CejGzQ+qhhHdPo9Xxf68KhzMr4bqY6/x9NQoPL8+HdHht3fNDBZPuw22vFqoneIbBACgN8aMo9db4HB9s8ywBpN8HWalGJIk7o0ZEZuMzLVPISpuxABWTURERER094bvK10i8huz2Qy3240xY8YEuhQiIiKin2QrKUbTmzvg6ewUMn1sHBKf3w7zhAkBqCx4uFwunD2bi5s3y4VMlhXMmTMfkyZl3FYDz+V14ffln+Nk/Wmf+YxeBzYrSYi6fzvk0Jh+106BUdPcizcOlqC2xSpkBp2MR5aOx9JZIyAHUdNX86pwXGmGs6gF8DFJEBY9LjlcKCv871mCMnoRqrsAndwhHC5JMtLnrUTG/NVQFN5qISIiIqLgxWerRNRvycnJ6OnpQV+f+A5wIiIiomCgut1o//RjdB7+0mcevjALcY89AcVsHuTKgktbWwtOnToGq7VXyCIiIrF48XJERd1eA6+mpw47C99Gs0Nsopi9Kja12TBv8kbop66CJN3e/oQUXDxeFZ/nV2P/6Sp4fSzBOSYpHC9sSEdSjCUA1f04T0vf17MEe5xiKAFdUSYcLWuB2/PNjEANRrkcIUqhz1mC4dEJmLfmScQkpg5s4UREREREfsDGIBH12/Lly1FSUoJLly4FuhQiIiIigbO+Do07XoWrrlbI5BALEp5+FmFz5gagsuChaRqKigpw+fI5aD6WgZwwIQ1z5iyAXq+/5bm8qheHq0/g85tHoPqYijXB5sSjzjAkrP4zKNGj/FI/Db66FiteP1iMmmZxlqAiS9i4aDTWLUiFIgdP01dze2G/3ARXibhnJgBoYQac67Ghsqjp24/JsMKiuwi97OsxEtLmLMOUheug6G79s0FEREREFAzYGCSiftuyZQv27NmDpqYmvPfee9i6dWugSyIiIiKCpqroOnYEbR9/CM3HPnkh6ZORsO0F6KOiAlBd8LDbbcjNPYHGxnoh0+sNWLBgMUaPHntb52qxtWJX4V5UWcVz6VQN69utyE5ZAtO8hyApbKQMRV5VxednarAv96bPWYIj4yx4fn0GUhPDAlDdj3M39sJ+ug6q1SWGsoS2cANOlLXA8+3npMEoVyJEuQZJ8goPCY2MQ+aaJxCbfHs/G0REREREwYKNQSLqt7i4OPzyl7/EK6+8gv/zf/4PDAYDNm/eHOiyiIiIaBhzd3ai+c3XYSspEjJJp0Psw48ictkKSEE0mykQ6utrkZf3FRwOu5DFxSUgO3sZQkNv3eDRNA25DWfxyfV9cGliE3akw40tvTJSsv8UuuR0v9ROg6++1Yo3DpagqklcalaWJKxbkIqNi0ZDpwTPz5Xm8sJ+sQGu6+KStgCghhtwur0PtSX/ve+oDBssugvQy60+HzNh5hJMy7ofOr1hQGomIiIiIhpIkuZrnRgiojtw/vx5AEBxcTF++ctfwu12Iy0tDatXr8aECRMQFhYGSZJu61xz5w7vZbyGus5OGzwe8R3VNHRFR1ugKDK8XhUdHdxHlPyH1xYNhG+uq5acPFT816tQbeK1ZRg5CkkvvgTjiJEBqDB4eL1eXL58HsXFBT7zqVNnYvr02ZBvo3Ha7ezFOyXvo6jjupDJmoalnTasip4GS9ZTkAwh/a59sPH31dezBL88W4PPcm/C4xVvIYyItWDb+nSMSQoPQHU/zl3XA1t+HTSbWwwVCU0WPb4qa/nOgrcaDHIVLEoBJElscFsiYjBv1eOIHzXBL/Xx2qKBwOvq3qXTKYiKGnr/jxIRUfDhjEEi6rennnrqe40/TdNQWlqK0tLSOzqPJEkoLi72d3lEREQ0THhsNlS+uRMtx0/4zKNWr0HMpocg38Y+efeynp5u5OQcQ3u7uGea2RyC7OxlSExMvq1zXWm5hr0lH6LP6xCyGJcHWzrdmJj5DPRj5/W7bgqMhrY+vHGwBDcbe4RMkoB181OxcdEY6HXBM0tQdXhgP98Ad2Wnz9wbYcSpph401f13LsEOi+4SDHKTz8eMm7YI0xdvgt5gHJCaiYiIiIgGCxuDROQXP5x8zMnIRERE9F2axwPN44bqdkP7zh/V5Ybm+f7HNLcbqtv1g39/83eXj499/cfb0Q53d7cwti4qConbXkRIekYAPvPgUlFxHWfP5sHjEWdQjRyZgoUL74PJZLrleeweOz4s+wxnmy/5zDO7bdhoTEX4xhcgW4b3Ho5DlapqOHS+Bp+eugmPVxXypJgQPL8+A2OTg2uWoKu6C/Yz9dAc4ow/6GTUmmTkljZ/54MaDHItQpQrkCXx58IcGol5qx5D4mgugUtERERE9wY2Bomo37j8JxERUfDTNO3r5twtmms//LevJp3mcUN1ffdj32n6ffPxHzQBoYqNhcEQOmceEp58GkpoaEDGDxZutwtnz+ahsvKGkMmyjNmz5yMtbfJtLf9+o7MSu4v2osMlziAL83jxUJsN06dthn7yckhS8Mwio9vX2N6HNw+WoKLB9yzBNfNSsCl7DPQ6JQDV+aba3bCfrYe7WnxzAAC4I4w4Ud+F9l7ntx+T4IBFdxkGucHnY0ZPzsTMJQ/CYOLSfURERER072BjkIj6bc+ePYEugYiI6J6kulzou1YAd1vr1w22PzTd1B/MsLvdWXfDiWw2I/7xpxA2f8Ft73V8r2pv///Z++/guO4zz/99d0TOkQCYwAQwgjmTYlCkRAUrR0uyNbM7nt07NXW9VVtbs7O7VT//bs3M9fz2zsyObVnRVo5UoBKDmHNOAAECJHLOnfuc+wctWdI5oBgQyc/rH4t4Tp/zdPG4ie5PP99vM9u2baK72xryJCensGLFWtLTM370PGEjwscVn7Op+mvs1oaY1hPg/mgqGbf/La60y1uKVIYXwzD58kA17207RzhiDfNz0+N5dl0xE/JThqA7e6ZpEj7XgX9/LWbQZq9rj5NKt4M935sSBI+jhgT3YZyOkOUhsQnJzL/5YfIKpw9U2yIiIiIiQ0bBoIiIiIjIMOSvKKfhd78h3NI81K2MOEnFRWQ+9SyezKyhbmVImabJ6dPHOXRoH4bNxObEiVOYP38JnsvYc7G2p56XTvyROl+TpRZjGKxv7mHRhLXEzL0Hh0tvM0eixjYfv//0NOU11ok7B3DLgtHcu7wQr2cYTQn2hvDtriFS221bD6bEsPlCGx2+P38xwkGQeNcRYlw1to8ZUzSXOavuJyYuYUB6FhEREREZanrHJiIiIiIyjJiGQdunH9O64YMhW35z0DgcODweHG4PDq8Hp8fz5z//6b+dXq/1Z56Lx3/zs28el5SaRPzofOLGj6e93TfUz25I+f1+du3aSm1ttaXm8XhYtGgF48dP+NHzGKbBpgvb+KjiM6JY78fx/hAP9XgYtepvcedO6pfeZXAZpsmmAzW8+3UFIZspwey0OJ5dV8ykgtTBb64PpmkSOtuG/0AdhG1eJ70uyjA4aJkSrCPBfQinI2h5SExcInPXPMjoySUD1LWIiIiIyPCgYFBEREREZJgIt7bS8Pxv8J8tG7yLulw43N8J5Tw/COC+92fvjx7z/Z95bY75889wufp1mc/09ARcLifR6HUeqP6Iuroadu7cgt/vt9QyM7NZvnw1SUnJP3qeVn8br5x8g/KuKkvNZZrc2trDqux5xN38KA5vXH+0LoOssd3Hi5+cpqyPKcG180Zz38pCYobRlGC0O4h/Vw2Rhh7bui8lhk2VrfQEI9/+zEGIeNdRYlwXbB9TMGkWc9c8SGx80oD0LCIiIiIynCgYFBEREREZBroP7KPxlZcwfNZJN29+AZ7s7IsB2zeTc15vHxN2Nj/70/E/nLBzeDw4XMPnA3+5NoZhcOTIAU6cOGJbnz69hJKSeTidzkuexzRN9jYc5K3S9wka1r0pc4NhHm6PMm7Jz/CMm9sfrcsgM0yTzQdreOfrCkI2E3fZqXE8s66YyaNTB7+5PpiGSehMC/7DDWAz2UiMixPhCMctU4INf5oStAbl3ph45qx5gDFT5tzwe5GKiIiIyI1DwaCIXJa6urrv/TkvL6/P2rX47nlFRERuBEYgQNMbf6Rrx3Zr0eEgY/09pN9xpwI8uaTu7i62b99MS4t1D8C4uDiWLl1FXl7Bj56nJ9TLa2fe4WjLSUvNYZqs6PBxW8IEEu95Fmd8Sr/0LoOrqcPPi5+cprS6w7a+Zm4B96+cQIx3+LzmRDsD+HZWE222XyK4O8XLVxUtBL4XcoaJdx0j1lVl+5i8wmnMW/swcYm6j0VERETkxqJgUEQuy+rVq7/9Fq3D4eDUqVO2tWvxw/OKiIhc7wJVVdT/7t8JNzZYau6MDEb97C+Jm6R92+TSKisr2LNnG+GwdbovP380S5bcRFzcjy/1eaLlNH849SbdEWv4khaO8kCLj+I5D+EpWqnpqhHIME2+PlzLW1sqCIajlnpmSizP3FFM0di0IejOnmmYBE82ETjSCIZprce6OewPUnrm+4G429FEgvsgLof1XvZ4Y5m96ieMm7pA97GIiIiI3JAUDIrIFTFN6xvyy6mJiIjIn5mGQfsXn9Hy/rsQtX5An7RgIdmPP4UrPn4IupORIhwOs3//LsrLSy01p9PJnDkLKC6e8aPhRyAS5L3yj9lZt9e2PrfLz91kknbn/xtnSm6/9C6Dq6XDz4sbz3D6fLttfdWcfB64aQKx3uHzEUG0zY9vVzXRVusSoAAdyV42lTcTin73PUiEeNdxYl3nbB+TO7aI+bc8QnzS8Ak/RUREREQG2/D5rV9EhrVLLfGp5T9FREQuX6SjnYbf/w7faeuUvCMmlpzHniBp8RJNssgltbW1sG3bJrq6Oi21pKRkVqxYQ0ZG1o+ep7LzPC+ffI3mgDUwSoga3Nvcw5wpt+OdfScO5/BZWlIuj2mafH2kjje3lBMMWb+EkJEcyzN3FFE8Ln0IurNnRg0Cx5oIHm8Em+8dGnFu9nf7OVfa8b2fux0tJLgP4HL0Wh7j9niZtfJeJszQa6uIiIiIiIJBEbksmzdvvqqaiIiI/FnPkcM0vPR7jJ4eSy1m3HhG/fwv8ebkDEFnMlKYpsmZMyc5eHAPhmFY6oWFk1i4cCkej/eS54kaUTZWfcVnVZsxbdKXot4g9/tjyF77S1zZhf3Wvwye1s4AL208zckq+ynBm0ryeGDVROJihs/HApFmH75d1RgdAWvRAS0JbjafbeH7EWeUONdJYl1nsYv8sgomsuDWx0hMyRiYpkVERERERpjh8w5AREREROQ6ZYRCNL/9Bp1bbL5M43CQfvs6Mtbfg8OtX8+lb4FAgF27vqam5ryl5nZ7WLRoGYWFP74nZXV3HX88/RbVPXWWmtcwWNfSw9L8xcTe9jAOT0y/9C6DxzRNth+r541NZwnY9VKKRwABAABJREFUTAmmJ8fw9O3FTBs/jKYEIwaBIw0ETzXbTglG4z3saevhQl3H937ucrSR6D6Ay9FteYzL7WHmsvVMmr0ch8M5QJ2LiIiIiIw8+uRBRK7Z/v37AcjJyWHMmDFX/Pjq6moaGhoAmD9/fr/2JiIiMtSC1dXU/+7/EKqzhjDutDRyn32O+KLiIehMRpKGhjp27NiCz2ddJjEjI5Ply9eQnJxyyXP4wn4+rvycbTW7bacExwTCPNRhULDsP+IeM7PfepfB09YV4KXPznDiXJttfcWsUTy0etLwmhJs7Lk4JdgVshYd0BDn4uuKZr4/HxslznWaOFcZdklixqhxLLztcZLSsgeoaxERERGRkWv4vBsQkRHriSeewOFw8Nhjj/Hf/tt/u+LHv/baa7z00ks4HA5OnbLutyQiIjISmaZJx+avaHn7TcxIxFJPnDOXnCefxpWYOATdyUhhGAbHjh3i2LFDtvWpU2cye/Z8XK6+9/8zTIO99Qf5oPwTeiI+S91pmqxt62VNShHx9z2NMzap3/qXwWGaJjuO1/PGpnL8QevrTVpSDD+9vYgZhcNnOU0zHMV/sJ5QaattPZLgYUdTF/V1we/93OXo+NOUoHV/TafLzYwl65g8dxVOp6YERURERETsKBgUkWHBNG3WDBIRERmhIl1dNL74PL3Hj1lqDq+XrIcfJWX5ShwOux2xRC7q6elh+/ZNNDc3WmqxsbEsXXoT+fmXXq2huruWN0vfo7Kr2raeFYrwUGuAifMewT15me7JEai9O8jLn53hWIV9wLZsxigeXjOR+FjPIHfWt3BdN75d1Zi9YWvR6aDG62BHefMPZgENYp1niHefwW5KMC1nDAtve4yUjFED1LWIiIiIyPVBwaCIiIiISD/qPXGchhd+R7Sry1KLGT2GUc/9Jd5ReUPQmYwk589Xsnv314RC1uUVR43KZ9myVcTFxff5eF/Yx0fnvmB7rf2yoW7DZFV7L6ti8km66zmcyVn92r8MPNM02XWigde/OovPZkowNdHLT28vYuaEzCHozp4RihLYX0eo3H6p03CCh6/rO2j+QWDocnSS6DmIi3bLY5xOF1MX3UbxgrU4nX1PzoqIiIiIyEUKBkVkyAUCAQBiYmKGuBMREZGrZ4TDtL73Du1ffm5bT7v5VjLuux+nZ/hM7cjwE4lEOHBgN2Vlpy01h8PB7NnzmTZtVp+TfYZpsKf+IB+Wf0xPxG97zNSeIHd1m+TOfQjPlOU4HFpycaTp6AnyymelHClvsa0vmZ7LI2snkTCcpgQvdOLbU4Ppt4aYuBxUOmGv7ZTgWeLdp+AHuwwCpGbls+C2x0nLyh+grkVERERErj8KBkVkyH2zr2BaWtoQdyIiInJ1gnV1NPzu3wlWX7DUXMnJ5D7zcxKmzxiCzmQk6ehoY9u2TXR0WKeiEhOTWL58DVlZ2X0+/kJ3DW+eeZ+qbvtlQzNCEda39jJ97DJibrkXR6z2txxpTNNkz6lGXvuyjN6ANWBLSfDy1G1FlEwaRlOCgQj+fbWEKzts68EED5tr2un4wfNx0k2S9wAurNOFDoeT4gU3M3XRrbhc+lhDRERERORK6DdoEbkidXV1fdZ6e3svWf+uSCRCY2Mjn332GUePHsXhcFBUVNRfbYqIiAwK0zTp3PY1zW++hmmz5GPCzFnk/PRZ3MnJQ9CdjBSmaXL27Gn2799NNBq11MeNm8CiRcvxer22j/eFfWw49zk7anfbLBoKnj8tG7rSO4qkW/8zrsyx/fwMZDB09oZ45bMzHD5rPyW4aFoOj66dTGLc8JgSNE2TcFUH/r21mEHrfY3bSZkR5WB58w8fSYyznATPSTCtj0vOyGXhrY+Tnnvp/TVFRERERMSegkERuSKrV6+2XbrKNE0++OADPvjgg6s+95133nkNnYmIiAyuaE8PjS+/SM/hg5aaw+0m88GHSV21ps8lH0UAgsEgu3dv48KFSkvN7XYzf/4SJk6cYnsfXVw29AAfln/S57Kh03oC3NUDufMewz1pie7HEcg0TfadbuKPX5bR4w9b6snxHp68rYg5k4fPPpGGL4xvTw2RauteqwC+BA+bLrTRE/p+8Oekh6SYQ7jMZqwpt4OieauZvuQOXO7hEX6KiIiIiIxECgZF5KqYpvX76HY/u1zr1q1j3bp119KSiIjIoPGdOU3D739LpN265KM3L59Rz/0lMQWjh6AzGUkaGxvYvn0TPl+vpZaWlsGKFWtISUm1feyFrhreLH2Pqu4a23pGKML6ll6mF64k5rZ7cHjj+7N1GSRdvSFe/aKUg6U/nKq7aEFxNo/dPJmkePtp0sFmmiahinYC++swQ9ZpP9Pj5FQ4wjHbKcFzJHhOgGldIjUxNYuFtz1GZl7hAHUuIiIiInLjUDAoIlckLy/P8rO6ujocDgfx8fGkpKT86DkcDgcxMTGkpqYyadIkbr31VpYsWTIQ7YqIiPQrMxKhdcMHtG38BGy+EJOyag1ZDzyEs48lH0UADMPgxIkjHD160PaLVUVF05k7d4Ht3mm9YR8bKj5jZ92ePpcNXd3ey02xBSTc8Te40hVQj1T7zzTx6ueltlOCSfEenrhlCvOK+t5zcrBFu4P499QQqeuxrffEu9l0vg1fxPjez530khx7GKfRaDMlCJNmr2Tmsrtwe/S6KiIiIiLSHxQMisgV2bx5s+Vn3+wNeO+99/Lf/tt/G+yWREREBkWosZH63/07wSrrko/OxERyf/osiSWzh6AzGUl8vl62b99MY2O9pRYTE8OSJTcxerR1D0DDNNhdv58Pz35CbzRge+5pPQHu6nWSO/9J3BMWatnQEarbF+IPX5Sx/0yTbX1eUTaP3zKZ5OEyJRg1CJ5sJnCsEaI2q4p4nRzxBTlT0fHDCjHOKhK9xzENa/iZkJLBglseJXv0pIFpXERERETkBqVgUET6xbUsIyoiIjKcmaZJ166dNL32B8ygNZCJL55G7rM/w52aNgTdyUhSXV3Fzp1fEwoFLbWcnFEsX76a+PgES+18VzVvnnmP8z21tufNDEVY3+Jj2sRVxMxZj8Mb1++9y+A4WHpxSrDLZw3KEuM8PH7LZBYU5wxBZ/YijT34dtdgdFrvaYCOeDebK1sJGt9/r+DAT2rcERzROkzD+rgJM5cya8U9eLwxA9G2iIiIiMgNTcGgiFyzTZs2AZCYmDjEnYiIiPSvqK+Xpj+8Qve+vdaiy0XmffeTdvOtOJzOwW9ORoxoNMLBg3s5c+akpeZwOJg1ay7Tp5fg/MF91BPuZUPFZ+yq23vpZUPjx5Kw7glcadYl32Vk6PGH+eOXZew91Whbnzs5i8dvnUJKwvCYEjQCEQIH6wiVW/dZBTC8Lg52+ymv7/hBxSTGeYGkmOMYUWuYGJeYyoJbHyV3bFH/Ny0iIiIiIoCCQRHpB/n5+UPdwoh08uRJdu3axfHjxzlx4gS1tRenADZt2kRBQUG/P05ERK6Mv/ws9b/7dyKtrZaaJyeXUc/9JbFjxw1+YzKidHZ2sG3bJtrbrfdRQkIiy5evJjs793s/N0yD3XX7+aD8E3x9LBs6vSfAnb1uchc+jXv8PC0bOoIdLmvm5c9L6eoNWWoJsW4ev2UKC4qzh8XfsWmahCraCRyowwxGrQc4oDnWxdbKViLmD0sB0uKPQaQaw+ah46ctpOSm+/DGaOJVRERERGQgKRgUERki//qv//rttOVgPE5ERC6PGY3S9slHtH70IdgslZ28bAXZDz+KMzZ2CLqTkcI0TcrLS9m/fxeRSMRSHzNmPIsXryAm5vtLJV5cNvRdzvfU2Z43MxTh7lYf0yatxTv7LhweLbU4UvX4w7z2VRl7TtpPCc6elMmTt04hJXF4/B1HOwP4dtcQbey1rUcSPOxs6qKuzjoJGOOqJSnmKEbEb6nFJiQz/+aHySuc3u89i4iIiIiIlYJBEZEhUlJSwuTJk5k+fTozZszgvvvuo6WlZcAeJyIiPy7c2kL9735DoPyspeaMjyfnyadJmjd/CDqTkSQUCrFnz3aqqiosNZfLxfz5i5k0qfh7E2AXlw3dyK66fX0uG7qmrZeVSYUkrnscZ2quzVEyUhw528LLn52hs48pwUfXTmbRtJzhMSUYMQgcbyR4ohkMm7vT7eScw2RfebPl3nUQJCPpJEawEsOajzOmaC5zVt1PTJx1b00RERERERkYCgZFRIbIc889N6iPExGRS+vet5fGV1/C8FsnWuImTyH3Z8/hSc8Ygs5kJGlubmL79k309HRbaqmpaaxYsYbU1PRvf2aYBjvr9rGh/BN8NnuuAczoDnBnwEvOwp/hHjt7WIRFcnV8gTCvf3WWnScabOuzJmTw5G1FpCUNjynBcG03/r01GN3WABOgJ9HDlvNt9ISsa4PGeupJ8h4lGrROGMbEJTJv7UMUTJrV7z2LiIiIiMilKRgUERERkRuaEfDT9Nof6dq1w1p0OslYfw/pd9yJw+kc/OZkxDBNkxMnjnLkyH5MmyVoJ08uZt68xbjdf34LVtV1gTfPvMeFPpYNzQpFWN/qY9qUW/GWrMPh9g5Y/zLwjlW08NLGM3T0WEO2uBg3j66dxJLpucMi+DX8Yfz76ghXddjXY10c6g5w9qy17iBEVsppIv5yomHrYwsmzWLumgeJjU/q36ZFREREROSyKBgUkRtGT08PO3fuZO/evZw6dYqqqiq6u7uJiYkhOzubmTNncuedd7J8+fJh8YGMiIgMvEDlOep/9xvCTdY9vjyZWeT+/C+ImzBxCDqTkcTn87Fz5xbq62stNa/Xy+LFKxk7dvy3P+sJ9fJhxafsrt9vu2yo1zBY0+ZjRfIkEu96DGdy9gB2LwPNF4jwxqaz7Dheb1ufOSGDp4bJlKBpmoRKW/EfqoewYT3AAQ0xLrZVtmKdEYTE+BYSXIcI+60Ts96YeOaseYAxU+bod20RERERkSGkYFBEbggvvvgiv/71rwkGrUt0RSIRKisrqays5MMPP2TevHn8wz/8A3l5eUPQqYiIDAbTMGj/fCMtH7wHUevH20mLFpP92JO44uKGoDsZSWprL7Bz51YCgYCllp2dy7Jlq0lMTAS+WTZ0LxvKP+1z2dCZ3QHWBWPJXfQc7jElA9m6DIIT51p5ceMZ2rutf99xMS4eXjOJZTNGDYugLNrmx7e7hmiLz7Yeinezrb6LZp914tHpCJGXVY6v4zRhm70E8wqnMW/tw8QlpvR32yIiIiIicoUUDIrIDaGysvLbUDAnJ4clS5Ywbdo0MjIyCAaDHDlyhA0bNuDz+Thw4ABPPPEEb731FhkZ2ktKROR6E25vp+H3v8V/5rSl5oyNJfuxJ0levGQIOpORJBqNcvjwPk6dOm6pORwOZsyYzcyZc3D+aQnays4LvHnmXap77afGskIR7m4NMLX4Nrwzb9OyoSOcPxjhzc1n2XbU/u972vh0nr69iPTk2EHuzMoMRwkcaSR4uhm7EVbT4+RMJMKRihbbx2emt+GO7MfX0WOpebyxzF71E8ZNXTAswk8REREREVEwKCI3CIfDwbJly3jmmWdYvHjxtx/SfePee+/lueee49lnn6WyspKamhr+8R//kV/96leWc/3yl7/k2LFjV3T9m2++mb/927+9pucgIiLXrufwQRpeegGjt9dSiy2cQO7P/wJvlpZtlEvr6upk+/ZNtLZag5L4+ASWLVtFbu7FlQe6Qz18WP4puxsO2J7rm2VDV6YWkbD+UZxJmQPauwy8k1VtvPjpadq6rFOCsd6LU4LLZw6PKcHwhU58+2oxe202AwQ64t1sqWolELUmhjHeMKOySulqLsU6Qwi5Y4uYf8sjxCel9XPXIiIiIiJyLRQMisgN4W/+5m9ITU295DH5+fn88z//M3fffTcAGzdu5O/+7u+I+8EycvX19VRWVl7R9Zubm6/oeBER6V9GMEjzW6/T+fVWa9HhIH3dXWTcuR6HW78ey6VVVJSxd+9OIhFrkFJQMJYlS1YSGxuLYRrsqN3LRxU/smxoKI7cJf8Rd8H0gW5dBpg/GOHtLeVsPVJnW586Lo2nby8mI2XopwSN3hC+vbVEqrts69FYN3vbezlf32FTNRk9upNQx266mq3Ljrq9MZSsuIfCGUuGRfgpIiIiIiLfp08+ROSG8GOh4DeKiooYP348lZWV+P1+zp8/T1FR0feOefXVVwegQxERGSjB6gvU//bfCdVbP6x3p6WT+/O/IH7ylCHoTEaScDjE3r07OXfurKXmdLqYN28hU6ZMw+FwUNl5/k/LhjbYnis7FGF9W4CpU9fhnXErDpfelo10p6vaeOHTM7R2WfeajPG4eGj1RFaW5A15UGYaJsHTLQSONEDEsB7gdHDBBbsrW7CpkpJikJl8gvZG6/8PAEaNn8a8tQ9qSlBEREREZBjTO1ARkR9ITEz89r+/2ZdQRERGHtMw6Nj0JS3vvo0ZiVjqiXPnkfPk07gSEoagOxlJWlub2bZtE93d1umqlJRUli9fQ3p6xmUtG7q2rZcV6dNJWP8IzsT0gW5dBlggFOHtrRVsOVRrWy8ak8rTdxSTlRpnWx9MkeZefLtrMNqt4SWAL97N1zUddAStr5dOF0yc0ElH7XbaG62/H3tjE5i96j7GFs0b8vBTREREREQuTcGgiMh3hEIhqqqqvv1zXl7e0DUjIiJXLdLZScOLz+M7cdxSc3i9ZD/6OMlLl+sDbLkk0zQ5ffo4hw7twzCs81MTJ05h/vwluNwuttXsYkP5p/gNu93WYFZ3gDsjCWQv/Wvc+VMHunUZBKUX2vn9J6dp6bQGbV6PkwdXTeSm2fk4h/h1xghFCRyqJ1Taals3PU6O+UOcquiwrecVOIlzHqSl6pxtffTk2cxZfT+x8Un91bKIiIiIiAwgBYMiIt/x8ccf093dDcC0adPIysoa4o5GluTkod8zR/qX0+n49n/T0zVVJf1nIO+t9kOHqf7//Svhzk5LLaFwPBP/5j8Tl5/fr9eU4aE/7yufz8cXX3zxvS8MfcPr9bJmzVqmTJlCWes5Xjj4Gue76m3Pkx2McE9HiLnz7yNp7u1aNnQE+uF9FQhFeHXjGT7Zab/n9LTCDP76gVnkZgztv5umadJb1krr1kqiPuuemABt8W42n2slbJqWWkKCm+LiTi6c3kx72Bp4xyemsGL944yfOqffe79R6PcsGQi6r0REROTHOEzT5h2AiMgNqK2tjTvvvJPW1ovfpv6Xf/kXbr755gG73tatW/m3f/u3b/986tQpwuEwxcXFeL1eAFauXMlf/dVf9cvjRESud0YoRNUrf6D+o09s6/n33s2Yxx7B6fEMcmcy0pw/f56NGzfS29trqY0aNYp169ZBjIM/Hn2frVV7bM/xzbKht+bPI2vNU7i159p14eS5Vv6fNw5T32q9N7weFz9dN5V1S8d/+8H8UAl1+Gn+qgJfVYdtPRrvYWdTF7XdNsvmO2DW7AQC7dtprrUPP4vmLmPZuoeJjVPoICIiIiIy0ujrqiIiXFxC9K//+q+/DQXXrl07oKEgXAwijx49avn56dOnv/3vwsLCfnvcYIhGrcusycjmdDpwOByYpolh6LtE0n/6+97yVVdT/v/9f/CdP2+pedLSmPiffkHKrJmY6LXqenat91U0GmX37l0cOGC/R+C8efNZuGghW6p28ebxD/BF7ZcNLekOsN5MZfy6/0Ts6OI/nVv33UjldDoIhqO8+ulpPtpxDruv1haPS+c/PVjCqMwETNMkGh2afzPNqEHHgTo69tZg2t1zLgeVmOytaMauw5y8RCaMa6Ts8IcYUZu9WVPSWXnPU4yZNB3QfX2t9HuWDATdV9c3l8s51C2IiMh1QBODInLDMwyDX/7yl3z00UcAjBkzhnfeeYeUlJQh7mzkaW/3EYlEh7oN6Ufp6Qm4XE6iUYO2Nut0hMjV6q97yzRNOr/eQvObr2OGrUvlJcwqIeenz+BOSr6WdmWEuJb7qru7i+3bN9PS0mSpxcXFsXTpKgIJEd48/Q41PusxADnBCOs7QkydcTeeqatxOF1X9TxkeGnqCvK/3zpCXYv1nvK4nfxk5QTWzi0Y8inBSEMPvj01GJ02U4BAd6yLrdXt9ESsYZ7H62Lm7Djaqr+ks6XO9vETZy1n5vK78Hi1dHx/0e9ZMhB0X12/3G4XaWnxQ92GiIhcBzQxKCI3NNM0+e///b9/Gwrm5eXx4osvKhQUERkBot3dNLz8Ar1HDltqDo+HrAcfJuWm1TgcQ/thvQx/lZXl7NmznbBNuJyfP5qZ8+exsXoTe88csn18zJ+WDV2RNZf4ux/AGa/fI64H/mCE97adY/OhGtspwYn5KTyzrpjc9KH9kNYIRAgcqCNU0W5f9zo52OWnvN5vWy+cnEZGSgXnjnyNaVpDw8TULBbc8ihZBRP6tW8RERERERkaCgZF5IZlmiZ///d/z1tvvQVAbm4uL7/8MgUFBUPcmYiI/Bjf6VPUP/9bop0dlpo3v4BRz/0HYvLzB78xGVHC4TD79++ivLzUUnM6nZTMnk9zUjf/n0P/jN+whoZwcdnQO41Uslf+Ja6ciQPdsgySI+Ut/OGLUtq6rNN3bpeT+1YUcsv80UM6JWiaJqHydgIH6zCD9is21Hsc7LjQRsQm2ExOjWXW3BgunNhARZV1CtbhcDBl7mqmLb4dt8fb3+2LiIiIiMgQUTAoIjck0zT5H//jf/DGG28AkJOTwyuvvMKYMWOGuDMREbkUMxKh5YP3aP98I3YjPKmr15L5wIM49SG2/Ii2tha2bdtEV1enpZaUlEzh7KlsqP+C2sa+lw29uzNM8cx78RTdhMOpPX+uB509QV776iz7z9j/vRfmJfPsumJGZSQMcmffF+0I4NtTQ7TRfpnAYKyLbfWdtASs+wQ6nQ5mzc/BHT3Gye07wGa3wZTMPBbc8ijpufrdWERERETkeqNgUERuON+Egq+//joA2dnZvPLKK4wdO3aIOxMRkUsJNTRQ/7t/J3i+ylJzJSWR8/SzJM4sGfS+ZGQxTZMzZ05y8OAeDMO6bOLoceO4kNLMb8/9wfbxMYbBza0+VuTOJ27V/Thjkwa6ZRkEpmmy/Vg9b20uxxe0hmlet5OHb5nCium5QzslGDEIHGskeLIZDGugZ7ocnAlFOFrZYRP3Qd6YVKbNgDN7X6e3q81SdzpdTF14C0ULbsbl0scFIiIiIiLXI/2mLyI3lB+GgllZWbzyyiuMGzduaBsTEZE+maZJ184dNL3+B8ygdVm/+GnTyX3mZ7hTUge/ORlRAoEAu3Z9TU3NeUvN7faQPCmLj3u/JtBqv2zo7K4A60gne9V/xJVdONDtyiBpaPPx8sYzlFZ32NZnTszkFw+UkJ0WR1ub/YTeYAjXduHfU4vRE7Ktt8c4+fpCO36bwDAu3sOCFfn0NO7g4Jd7bR+fnjOG+bc+SmpmXr/2LSIiIiIiw4uCQRG5ofzP//k/LaHg+PHjh7grERHpS9TXS+MrL9NzYJ+16HKR9ZMHSV17s5ZxlB/V0FDHjh1b8PmswU5CShLlqXVc6D5t+9jcYIR7OqNMKfkJninLcTh0v10PIlGDjXsv8NHOKiJR6/RoQqybh1ZP4q6VE3C7XURtjhkMhi+Mf38d4aoO23rE62RfWy/n661fnACYOnsUY8f0cnTb7wj0dlnqLpeH6UvvYPKcm3A6Xf3ZuoiIiIiIDEMKBkXkhvG//tf/4rXXXgP+HAoWFurb/iIiw5WvrJSG539LpK3VUvPmjiL3ub8kdoyWgZZLMwyDY8cOcezYIdu6me1mi+cwZtRai4ka3NzuY8WoRcSvug9HbOIAdyuDpaKuk5c2nqG22X4CcOHUHB5ZM4nkBC8Ox9AsHWoaJqGyVvyH6iFsDSVNB1zAZO/5NmxuXzKzE1l0Ux41pz9n70b7+z8rfwLzb3mEpLTsfu5eRERERESGKwWDInJD+PWvf80f/nBxryCHw8GTTz7JuXPnOHfu3CUfN3XqVPLytJySiMhgMqNRWj/eQNvHG8C0LomXsuImsh56BGdMzBB0JyNJT08327dvprm50VJzelyUp9TR7O22fezsLj93OrPIWv3XuDIVQF8v/MEI7207x+aDNbZ78GUkx/DErUXMnJAx6L19V6TNj393DdEWn23dF+NiW20H7WFrJOjxupi3bCypiY0c/Px/E/Rbw0+3J4ZZK9YzYeZSTcCKiIiIiNxgFAyKyA3h0KE/f0vaNE3+6Z/+6bIe96tf/Yr77rtvoNoSEZEfCDc3U//8bwhUlFtqzvgEcp56mqS584agMxlpzp8/x+7d2wiFrPuxBeLCnEqtJOyyhiq5wTD3dBkUzX4Y96QlQzYtJv3vSHkLf/iilLYu65KbDgfcPG809ywfT6x36N4mm+EogSMNBE+3YJdcGm4Hx3tDnKrvsH184ZRM5i7K4vTe9yk9d9L2mNxxxcxb+xAJyen92LmIiIiIiIwUCgZFREREZFjo2rubpj+8guH3W2pxU4rIffY5POn6IFsuLRwOs2XLFo4fP26pmUB1Sgv1iR3wg7wvJmpwS5uP5QVLiV9zLw5v/KD0KwOvszfE61+Vse90k219dHYiP729iPGjkge5s+8LXejEv7cW0xe2rTe7HWyv7SBoWBPDpJRYlt08gai/jG3vvkw4FLAc442Jp2TVfYwrnq/AW0RERETkBqZgUERuCK+++upQtyAiIn2I+v00vfYq3bt3WYsuF5l330vabXfgcGq5O7m0lpYWNm78lNZW676UQVeYs+kN9MZYp8XmdPm505VL5s3/GVf66MFoVQaBaZrsOFbPW1vK6Q1ELHWP28ndy8Zzy/zRuF1D9/pi9ITw7aslUt1lWw97nexs7qbeJjB0Oh2ULBzNlGkJHNn6Bo0XymzPUTBpFnNWP0BcwtCGnyIiIiIiMvQUDIqIiIjIkOk5e5YL//hrws3NlponK5vcn/8lcYWFQ9CZjCSmaVJWdpoDB3YTjVqXB22J66YqrZmo0/jez0cFw9zTDVPmPIZ7wkJNUV1HGtt8vPzZGc5c6LCtF49N48nbppCTNnSToaZhEjzVTOBoI0QMa90JFZEoB893YK1C3ugUlt88gZaag3z12sdEI9Zlc2Pjk5iz+gFGTy7p/ycgIiIiIiIjkoJBERERERl0ZjRK9XvvU/36m5g2QU7ykqVkP/o4zti4IehORpJgMMDu3du4cKHKUos6DKpSm2mJ7/7e0qGxUYNb2vwsG7Oc+DV34/DqPrteRKIGn+29wIadVUSi1jgtIdbNQ6snsXRG7pAGwZHmXny7azDarUt+AnR7nWyt6aDH5jnExnlYsrqQ3FEm+796nta6SttzjJu6gJKV9xITl9CvvYuIiIiIyMimYFBEREREBlW4rY1Tv36e7pOnLDVnXBzZjz9F8sJFQ9CZjDSNjfVs374Zn6/XUuv1BClPbyDg+f7yi3O7/Kzz5JN5y5O40vIGq1UZBBV1nby88Qw1zdb7AWDh1BweWTOJ5ATvIHf2Z0YoSuBgPaEy63K3AFG3k0NdPsrr7QPD4lmjWLB8DFUnt/HFHzdi2HyxIi4xlfk3P8yo8VP7tXcREREREbk+KBgUERERkUHTtW8PTX94BcPns9RiJ0xk1M//Ak9m1hB0JiOJYRgcP36YY8cOYZqmpd6Q2MGFlFZMx59rF5cNdTBl3pO4x8/TsqHXEX8wwvvbzrHpYA3WuwEykmN44tYpzJyQOei9fcM0TcKVHfj312Ha7HcIUOc02VXTTtjmnk7PSmDlrZOI8Xaz/YP/TUdTje05Jsxcyqzl6/HEaApWRERERETsKRgUERERkQEX7e2l6bVX6d67x1p0OEi/cz0Zd67H4XINfnMyovT09LBjx2aamhostbAzyrm0Rjri/hw8x0YNbm33sXzsKmLXrsfhiRnMdmWAHS1v4dUvSmnrClpqDgesnTuae1eMJ9Y7dG99o11B/HtqiNT32NaDHifbG7toDloDQ7fHyfxl45hakkPpgS84vf8rTMO6vGhiSibzb3mE7NGT+r1/ERERERG5vigYFBEREZEB5Tt9ioYXnifS3mapeTMzyXn2OeImTR6CzmSkOX++kt27txEKWUOgzhgfFemNhF1/XlpxbpefO2PGkHnLEzhTcwezVRlgnb0hXv+qjH2nm2zrBVmJPH1HEeNHJQ9yZ39mRg2CJ5oJHGsEwzoFaDodnAmGOFrfazvpOH5yJsvWTiDQU8+m1/+BrrZGyzEOh4PJc1YxfckduD1Dt0SqiIiIiIiMHAoGRURERGRAGOEQLe+9S8eXn9vWM1csY9zPf4bNoI/I90QiEQ4c2E1Z2WlLzcSkJrmNuqR2+NPqoHmBMPf4XEye9zTusbO1bOh1xDRNdhyr560t5fTaLMnpcTtZv3Qcty4Yg9vlHIIOLwo39ODfXYPRxwtcu9vBtroOfFFrJJiUHMOymydSMC6J4zs/oezQ12ATHSZnjGLBLY+SMWpsf7cvIiIiIiLXMQWDIiIiItLvgtUXqH/+t4RqrftgOePjKfyLn5Nz0wqiUQOCvUPQoYwU7e1tbN++iY6Odkst4ApTkd5AT8zF8CUjFGFtZ5AV027DLLoVh1sTVNeTxjYfL392hjMXOmzrxWPTePK2KeSkxQ9uY99hBCL4D9QRrrDerwARt5N97b2c77EGhk6ng1kLCpi7ZCxtDRV89sq/0tvZajnO4XQydcEtFC+4GZfb0+/PQURERERErm8KBkVERESk35iGQfsXn9H6wXuYEes0T1xRMbnP/IzMiWOGoDsZSUzTpKzsNPv378Kw2VOtNa6byrRmok6DtHCUNR1BVk6+iYx778URl0xbmwLn60UkavD5vgts2FlFOGK9FxJi3Ty4eiLLZowasulQ0zQJlbcROFiPGYxa68AF02BfTQcRm3VDRxWksOLWSSQmOzny9ducO77L9jppOaOZf8ujpGXl9/MzEBERERGRG4WCQRERERHpF+HWFhp+/zv8ZaWWmsPtJvMnD5C65mYczqFb3k9GhmAwwKZtn9NSb91TLeowqEptpiW+m5RIlNVtQRaNXkr8inVk5o/C5XJenESV68K5ui5e2niGmuYe2/qC4mweWTuZlIShmw6Ntgfw7akh2mQfRvvcTrbVd9IesQaGsXFuFq+awJQZOdRXnmTH+2/i7+m0HOd0uZm+5A6mzF2F0+nq9+cgIiIiIiI3DgWDIiIiInJNTNOke/cuml7/A4bfb6nHjB5N7s/+gpj8giHoTkaaE1UnOLBrF07rwCm9niDl6Q14HQHubguydPQy4lbcgTMuefAblQEVCEV4b9s5Nh2osdldDzKSY3ji1inMnJA56L19w4wYBI41EjzRZLcFIIbTwfHeAKc7/LbPoWhmLotXFeIgyN6Nr3L+zAHb62TmF7LglkdJSsvu3ycgIiIiIiI3JAWDIiIiInLVoj09NL76Ej0HbT7QdjhIu/V2Mu6+F6dH+2DJpdV01fHFzo9xNZs4sS4HWZ/YQXtCE2u7giwbvZSE5QoEr1fHKlp49fNSWrus+/A5gDXzCrhvRSGx3qF7Oxuu7cK/pxajJ2Rbb3LCrvoO/IY1EkzPSmDFrZPIzU+muuwwhza/Q9BvnYh0e7zMXLaeiSXLcDg0aS0iIiIiIv1DwaCIiIiIXJXeE8dpePH3RDs7LDV3Rga5zz5H/OQpg9+YjCiNvU18cmID/spekkJx8INQMOyMUpvSwNxgGytSl5Kwch3O2KShaVYGVGdviNe/KmPf6SbbekFWIj+9vYjCvKELhA1fGP/+WsJV1uU+AUJuB7tbeqjzhy01t8fJvGXjmDkvn5C/m50bfk9txTHb8+SMncL8tQ+TkJLRr/2LiIiIiIgoGBQRERGRK2IEg7S8+xYdmzfZ1pOXLCXrkcdxxcUNcmcykjT5WthY9hHlNRcY355Nkmm9X3q8veS7qnkgeyGJs+5QIHidMk2THcfreWtzOb0B6xqyHreT9UvHceuCMbhdQzM5ZxomnUfq6dpxHsLWPSxNB1SEIhyq78G6kyCMm5jBspsnkpgcQ+XJvRz5+n3CQevSy56YOEpW3sv4aQtxOKyTsyIiIiIiItdKwaCIiIiIXLZAVRUNz/+GUEO9peZMTCTniadImjt/CDqTkaLF38bG0g0caD5NQVcmk3pHWY4xMYn1NrJ+3GRSSn6OIzZxCDqVwdDY7uOVz0o5fb7dtl40JpWnbisiJz1+kDv7s2BTDy2bzhFssC73CdDtcrCtoZOuiDUwTEyOYdnaiYyfnElvZyvb3nuBhvNnbM+TP2Emc9c8QFxiSr/2LyIiIiIi8l0KBkVERETkR5nRKG0bP6H1ow8hap2HiZ8+g9yfPos7NXXwm5MRoT3QwcbSDexuOUFMxEtxawHxkRjLcQ5HiGVj0hi/8DEFgtexSNTg830X2LCzirBNoJYQ6+bB1RNZNmPUkE3OGb4wgSMNdJS3gXWrQKIuB4c7fZztttkL0QGzFhQwb+k43B4HZw9v49iODUTC1j0JY+ISmbP6fkZPnq0pQRERERERGXAKBkVERETkkkJNTTT8/rcEKsotNYfXS9YDD5Fy02p9oC22OoKdfFb6EbuajxEFcnpTGNORgRPrkpB5yR6Wr76fmOT0wW9UBk1lfRcvbTxDdZP9BN6C4mweWTuZlATvIHd2kRmOEjzVTOBEM9iElgB1psGeum6ChjUxzM1PZsWtk8jITqS7vYl9X7xGS+052/OMLZrH7FX3EROnEFxERERERAaHgkERERERsWWaJl3bt9H05muYQetETMy48Yz62XN4c61LQYp0Brv5vHQDO5uPEnGA23AyqT2b9IA1AHE5YOG8RUwomqGA+ToWCEV4f1slXx2sxrSZwEtPjuGJW6Ywa2Lm4DfHxX0EQ+faCRyqx/Rb9zoECLgc7GzqpilkrcfEulm8qpCimbmYpsHp/V9xYtenGFHrsXGJKcxb+xB5hdP7/XmIiIiIiIhcioJBEREREbGIdHXR+MqL9B45bC06HKSvu4uMO9fjcOvXSfm+7lAPX5R+xPamI4QdJjggKRjLxLZcvFHr/ZKemsbylTeTkpI6+M3KoDlW0cKrn5fS2mWz7CawZm4B964oJC5maF5TwvXdBA7UEW0L2NZNp4Mz/hDH2nqxmyGcMiOHxasKiYv30tFcy74vXqO9sdr2XIUzljBrxd14Y+L68RmIiIiIiIhcHn2SIyIiIiLf03PkMI0vv0i0u8tS82Rlk/uz54ibMHEIOpPhrCfcy5elH7Gt8TChPwWCmJDflU5+dxoOrJOAxcUzmDNnAS6Xa/AblkHR1Rvita/K2He6ybZekJXAU7cXMSEvZZA7uyjaEcB/sJ5IjfX1Di5uLdjsdrCztoOAzbKhaZnxrLhlEnljUolGwpzY9Smn9n2Badjsm5iSwfybHyFnzOT+fhoiIiIiIiKXTcGgiIiIiABgBAI0v/U6ndu+tq2nrLiJrAcfxhkbO8idyXDmC/v4qvRjtjYeJPhNIAh4I24mtuWQFLJORcXGxrJkyU0UFIwZ5G5lsJimyY7j9by1uZzegHUpTbfLyd3LxnHrgjG4Xdb9Jgea4Q8TONpIqKz1Yvpno8vjZGd9Bx1ha8jndjuZu3QssxYU4HI5aa0/z74vXqOrtd7mTA4mz1nJjKXrcHti+veJiIiIiIiIXCEFgyIiIiKCv6Kcht//jnBTo6XmSkom56mnSSyZPQSdyXDlj/jZVPoJWxr2E/hOIAiQ7ktgfHs2btM6CThqVD5Ll64iPj5+ELuVwdTY7uOVz0o5fb7dtl40JpWnbisiJ33w7wEzYhA83UzgeBPYBH4AQa+Tfc091PhCtvUxE9JZfvNEklPjiIRDHNnxKWWHtmDabJyYnJ7D/FseJTNvfL8+DxERERERkaulYFBERETkBmZGIrR+vIG2Tz4Cmw+1E0pmk/Pk07iTk4egOxmOApEgW8o+YVP9Xvw/CASdhoOxnZlk91qXhXQ4HMyePZ9p02bhcFiXFZWRLxI1+GJ/NR/uqCQcsVlKM9bNg6smsmzmqEG/B0zTJFzZgf9QPWZv2PaYqNvJiZ4Ap+t9tkOESSmxLFldyPjJmTgcDpqqz7L/yzfo6Wi2HOtwOClesJapC2/F5fb087MRERERERG5egoGRURERG5QoYZ66p//LcGqSkvNERND9kOPkrx8hUIcASAYDfF16Sd8Vb+XXofBD7cMjA95mdiWQ1zEulRiYmISK1asITMze5C6lcFWWd/FSxvPUN3UY1tfUJzNI2snk5LgHeTOINLYg39/HdFWv23ddDqojEQ4WNNBxCYR9HhcLFk1gQXLx9HTGyQc9HN0+wYqju20PV9qdgELbnmUtOyC/nwaIiIiIiIi/ULBoIiIiMgNxjRNOrdupvntNzFD1qXyYidMJPfZ5/BmK8QRCEXDbCv7lC/rdtNjEwhiQk5vCmM7MnBg3Stu/PiJLFy4DK938AMhGXiBUIQPtlfy5YFqu6Fj0pNjeOKWKcyamDnovUW7ggQO1hO+0NnnMU0u2F3Xgc+w32iwaGYut9w1ldT0eKJRg/oThznw1Zv4uq3LpDpdLqYtvp2iuWtwuqzL6IqIiIiIiAwHCgZFREREbiCRjg4aXvo9vhPHrUWXi4y77ib99nU49KH2DS9sRNhR+ilf1O2iyy4QBNxRJ7NasnCHk6w1t5uFC5dRWDhJU6fXqePnWnnls1JauwKWmgNYM7eAe1cUEhczuG87jUCEwLFGQmdasF0TFOjxONlZ30lbOGpbHz0+jcWrCsnITiQpJZaAr4dtH71G2ZHdtsdnjBrHglseJTkjt7+ehoiIiIiIyIBQMCgiIiJyg+g+eIDGV1/C6LEu9efJzWXUz/6C2HHjh6AzGU4iRoSdZRv5vHYnnX0EggAzuryk+sYSsll7MT09kxUrVpOcnDqwzcqQ6OoN8cams+w51WhbL8hK4Knbi5iQZ91rciCZUYPgmVaCxxoxQ/aBX8jjZH9rLxd6g7b19KwEFq8qZExh+sVzmiYVJw+w46M/4uvpshzvcnuZuexOJpaswOm0TsyKiIiIiIgMNwoGRURERK5zUb+f5tf/QNeuPvbDWr2GzJ88iDPGujec3DiiRpTdZZ/xWe122i8RCE7xRZnmnEFtV4iQzTjW1KkzmD17AS5NnV53TNNk5/EG3tx8lt5AxFJ3u5ysXzqO2xaOwe0avJDMNE3C5zsJHKzH6LEujwxguByc8AU5Ve+zHSKMT/Ayf8U4imbk4nRevPl7Opo5tOVd6itP2Z4zZ8xk5q19mMTUwV8mVURERERE5GopGBQRERG5jvnKSmn4/W+JtLZaaq6UVHKffpaE6TOGoDMZLqJGlL1nP2djzXbaHNE+A8FJgSirU+ZQG4intqXZUo+NjWPp0pvIzx89wB3LUGhq9/HyZ6WcPm/dWw+gaEwqT95WRG56/KD2FWnuxb+/jmizz7ZuOuB81OBAQzdhm00Q3R4nJQtGU7JwNB7vxTA7Gglzev9XnN73JUbUGoB6vLHMWnkPhdMXa5lcEREREREZcRQMioiIiFyHjHCY1g/fp/3zjWDzYXji3HnkPPFTXImJQ9CdDAeGabC/7As2Vn9Ns7PvQLAwEOWO7LnEjp3Dnn17CIetoeCoUfksW7aKuLjBDYVk4EWiBl/sr+bDHZWEI4alHh/j5sHVE1k+c9SghmTR7iCBQw2Eqzr6PKbZBbvru+iNWvsGKJqZy/zl40hM+vO0dN25kxza8g69ndYvUwDkFU5n7poHiU9KvZb2RUREREREhoyCQREREZHrTLC2lobn/51gdbWl5oyLI/vRx0latESTLjcowzQ4ePZLPr2wlSZnFPpY8XFcMModWXOZtHg9Bw4fonznNssxDoeD2bMXMG3aTN1P16HK+i5e2niG6ibrvqQA84uyeXTtJFISB28ZYiMUJXiskeDpFjDsFgWFXo+TXQ2dtPSxz2DBuDSWrC4kI/vPX4zo7Wrj8Jb3qK04ZvuY+KQUltz+EOn503Svi4iIiIjIiKZgUEREROQ6YRoGHZu+pOXdtzEj1uXv4iZPIffZn+PJ0H5YNyLTNDl89ks+ubCFhksEgqNDBusyZzNt5k/o6Oll4xef0tnZYTkuKSmZ5ctXk5mZPbCNy6ALhqK8v/0cXx6oths4Ji0phidunULJxMF7LTENk1BpC4GjjZhB+8Av7HFyoK2Xqp6gbT0tM54lqycwenzat+FeNBqh9OBmTu35nGgkbHmMw+FkxuI1LLr5XlyeGNraevvvSYmIiIiIiAwBBYMiIiIi14FwWxuNLz6P7/Qpa9HlIvPen5B2y204nH2kQXLdMk2To+Wb+PT8ZmqdkT4DwfyQwR2ZJcycdT8Odwxnzpzk4ME9GIZ1GcbCwoksWLAMr9c7wN3LYDt+rpVXPiultStgqTmA1XMLuG9FIXExg/NW0jRNItVd+A/WY3TZB36Gy8EpX5CT9T7sFg2NT/Ayf8U4imbk4nT+edqv4Xwphza/TXd7k+15M/MKmbPmASZMmYzL5STax5KkIiIiIiIiI4mCQREREZERrmvvHpr++AqGz2epefMLGPWz54gZPWYIOpOhZJomx8s388n5TdRcIhAcFTK4I6OEkpL7cXpiCQQC7Nr+OTU1FyzHut0eFi5cyoQJkwe4exlsXb4Qb3x1lj2nGm3r+VkJ/PS2IibkpwxaT5FWH/79dUQb7af0TAdciBocaOgmZDPa6HY7mbVwNLMXjsbjdX37c193B0e+fp/qssO2542JS2TWirsZN3U+Doe+TCEiIiIiItcXBYMiIiIiI1S0t5emP75K9749tvW0m28l476f4PRoqutGYpompyq28EnlV5x39R0I5oQNbs+YxZxZ9+PyxAFQX1/Ljh1b8PutIXNGRibLl68hOXnwgiEZeKZpsutEA29sOktvwLoEsdvl5K6l47h94RjcrsEJyYzeEP5DDYTPtfd5TKsLdtV30dPHFF/RjFzmrxhHYtKf9z80olHKDn/Nyd0biYTtpg8dTJi1lJlL78QbG3+tT0NERERERGRYUjAoIiIiMgL5Tp+i4YXnibS3WWru9HRyn/4Z8cVTh6AzGUqny7fySeUXVLoi4LI/JitscHv6TOaVPIjLEwuAYRgcPXqA48eP2D5m6tSZzJ49H5erj5PKiNTU7uOVz0s5VWUfwE0ZncpTtxeRmz44IZkZjhI43kTwVDNEbTY3BHweB7sbumgK2e8zWDAulcWrJpCZk/i9nzfVlHNo09t0ttbbPi49dyxz1zxAeo6mq0VERERE5PqmYFBERERkBDHCIVree5eOLz+3rSctXEz2Y4/jik8Y5M5kKJWWb+XTyi8ov0QgmBkxuS19Bgtm/TkQBOju7mL79s20tFj3WYuNjWPp0pvIzx89UK3LEIhEDb7cX82HOyoJRawTd/Exbh5cPZFlM0fhdDhsztC/TMMkdLaNwJEGTJupRYCw28Ghdh/neuz3GUzLjGfxqkLGFKbj+E7Pgd4ujmz7kPOn99s+zhsTz8zld1E4Y7GWDRURERERkRuCgkERERGRESJw4TwNz/+WUF2tpeaMjyfn8adIWrBwCDqToVJe8TWfVHxOmbvvQDA9YnJr2nQWlTyI+09Lhn6jsrKCPXu2EQ6HLY/Lyytg6dKbiIvTkorXk2MVrbyx6SwNbdblYgHmF2Xz6NpJpCTG2Nb7k2maRGq78R+ow+i0D/wMp4Mz/hAn6nuxmxGMS/CwYPk4imaOwun8cyBoGFHKj+7gxM5PCIcCtuceP30Rs5avJyYu0bYuIiIiIiJyPVIwKCIiIjLMmYZB++cbafngPYhaPxqPL55KztM/w5OePgTdyVA4V7GdTyo2csYd6fM3+tSIya1p01gy6yHc3u8HguFwmP37d1FeXmp5nNPpZPbsBUydOuN7k1cystW39vLGpnKOn2u1raclxfDELVMomZQ5KP1E2/z4D9QRqe+xrZtAjWmwv76boGFdVtTtdjJrQQElC0fjjfn+/wla6io5uOktOpqtX6IASM3KZ+6aB8nMG3/Nz0NERERERGSkUTAoIiIiMoyFW5pp+P3v8J8ts9QcbjeZ9z9I6uq1OJxaAu96Z5om5yt38mn5Rk66w33+Jp8cNbkldSpLZz2E12ud9mttbWH79k10dXVaaklJySxfvobMzKz+bl+GSG8gzIc7KtlyqJaoTcDmAFbPLeC+FYXExQz820PDFyZwuIFQuXV/1G+0OWF3YxddNsucAkyZkcOCFeNJTPr+VGPQ38PR7RuoPLHH9nGemDhmLFnHhFlLcTq1X6aIiIiIiNyYFAyKiIiIDEOmadK1ayfNr/8BI2BdBi9m9Bhyf/4XxOTlD0F3MphM06S6chefln/K8UsEgklRk7WpxawoeRivxxoImqbJmTMnOHhwL4ZhDVwKCyexcOFSPB5vfz8FGQJRw2DbkTre315Jj9+6VCzAuNwkHrt5MhPyUwa8HzMcJXiymcDJZugj8PO7Hexu6qYxaL/PYP7YVJasnkBmzveX/jRNg3PHd3Nsx0eEAvZLpI4tnk/JiruJTUi+ticiIiIiIiIywikYFBERERlmot3dNP7hZXoOHrAWHQ7Sb19Hxvp7cLj1q9z1rqH6EBtOvcsxdwjTbb+sZ0LUZG1qEStnPUKMzYQgQCDgZ+fOr6mtvWCpud0eFi1aRmHhpH7tXYbOqao2Xt90ltrmXtt6SoKX+2+awOLpuTgHeLlY0zAJVbQTOFyP6bcP/CJuB4c7fJR32+8zmJYZz+JVhYwpTLcsb9vWeIGDm96mreG87WOTM0Yxd80DZBdMvLYnIiIiIiIicp3Qp0kiIiIiw0jviWM0vPh7op3WZR7dmZmMevY54iZNHoLOZDBFQr18ufs3fBauJ+JxcHHBx++LN0xWJ09hVckjxHoT+jxXfX0tO3Zswe+3TlJlZGSxfPlqkpMHfmJMBl5ju4+3Npdz+GyLbd3tcnLrgtHcsWjsoCwbGq7rxn+gDqPdOvUMYDgdlAVCHKvvxbp7KsTFe1iwYhxFM0fhdH7//wOhgI9jOz+m4uhOLu5I+H1uTwzTFt/O5Nkrcbq0bKiIiIiIiMg3FAyKiIiIDANGMEjzO2/SuWWzbT156XKyHn4UV1zcIHcmg62q/Gteq/iYWo8DnNZAMM4wWZU8hdWzHiYuJtHmDBcZhsGRIwc4ceKIbX3atJmUlMzHpdBkxPMHI3y8q4ovD1QTiVpDMoC5U7J4cNVEslIH/jUk2hHAf6COSG23bd0Eak2D/fXdBGz2PXS7ncxaUEDJwtF4fxBgmqZB1an9HN32IUF/j+35R0+eTcnKe4lPSr3WpyIiIiIiInLdUTAoIiIiMsQCleeof/63hBsbLDVnYiI5T/yUpLnzhqAzGUwhXwcf7fk/bDXbMDzWQDDGMLkpeTJrZz1C/CUCQYDu7i62b99ES0uzpRYbG8eyZavIyyvot95laBiGyY7j9by37RxdvSHbYwqyEnl07SSKxqYNfD/+MIEjjYTOttoN8QHQ7oLdDV109rHP4JTpOSxYMZ7E5BhLraO5loOb3qal7pztY5PSspmz+gFyx0656ucgIiIiIiJyvVMwKCIiIjJEzGiUtk8/pvXjDRC1LqQXP30muU8/gzsldfCbk0Fjmialpz7h9ZqttHic8IM91BymydK4AtbPeYaE2KQfPV9lZTl79mwnHA5banl5o1m69CbiNHk64pVVd/D6V2c532g/lZcY5+G+lYWsmJlnWYazv5kRg+DpZgLHmyBsH/j53Q72NnVTH7TfZzB/bCqLVxWSlWu9x8NBPyd2b+Ts4W2YpvX8LreHqYtuZcqcVbjcnmt7MiIiIiIiItc5BYMiIiIiQyDU2EjD739L4FyFpebwesl68GFSVq7C4RjYD/RlaPV21vPevt+wx+UDj9NSzzacPFb8IBPz5/zoucLhMPv27aSiosxSczqdzJmzgOLiGbqnRriWTj9vb6lg/5km27rL6WDN3ALWLx1HfOzAhmSmaRI+14H/cD1mrzWIBoi4HBzt9HO2O2A7RJiWEc/iVYWMmZBuuTdN0+TCmYMc2fYBgd4u2/PnT5jJ7FX3kZCcfq1PR0RERERE5IagYFBERERkEJmmSef2r2l+83XMYNBSjxk3nlE/+wu8ublD0J0MFtM0OHzkbd5u3k+X2xoIukyTNUmTuH3u03hdPx7utLa2sH37Jrq6Oi21pKRkVqxYQ0ZGVr/0LkMjGIry6Z7zfLbvAuE+luGcNSGDh9ZMIjc9fsD7iTT04D9QR7TVb1s3HXA2GOZoWy8Rm0QwLt7D/OXjKJ41ynaisbO1nkOb36Gp+qzt+RNSMpiz6n7yCqdd0/MQERERERG50SgYFBERERkkkc5OGl95kd6jR6xFp5P0dXeRse4uHG79inY962ip4K1DL3DUHQabUHCM4eGxWU9RkDX5R89lmianTx/n0KF9GIY1LJowYTILFizB4/H2S+8y+AzTZO/JRt75uoL2buuXCQDyMhN4eM1Epo/PGPB+op1BAgfrCFfbT/CZQL1psK+xG79hTQRdbiezFhQwe+FovDHW17pwKMipPZ9RemgLps097XS5KZ6/lqL5a3HrvhYREREREbli+tRJREREZBD0HDlM48svEO227gfmyc4h92fPEVc4YQg6k8FiRCPsOvAKH3Sdwm8TCHoMk3UZJaye9TAup+tHz+f3+9m1ayu1tdXWc3k8LFy4jMLCSf3SuwyNirpOXv/qLOfq7EO4hFg3dy8bz02z83G7rPdUfzICEQLHGgmdacF2TVCg0wm7G7tpj1j3TAWYPD2HhSvGkZgca6mZpknN2aMc3voe/p4O28ePGj+VOat+QmKqpl9FRERERESuloJBERERkQFkBAI0vfkaXdu32dZTVt5E1oOP4IyJGeTOZDA11R3nteN/5KzHAJsAZzLxPDbvZ2SmFlzW+erqati5cwt+v3UZx4yMLFasWENSUvI19y1Do707yDtby9l9stG27nQ4WDU7n7uXjycxboD3EYwaBE+3EDjWCGH7JUwDLgf7WrqpDURs63ljUlmyupCs3CTbend7E4c2v0PD+TO29fikNGav+gn5E7RHpoiIiIiIyLVSMCgiIiIyQPwV5TQ8/xvCzc2WmispmZynnyFxZsngNyaDJhoOsGnv83waOE/YYw004g2Te0ctZfHUuy8r8DAMg8OH93Py5FHb+rRpsygpmYfL9eMThzL8hMJRPt93gU/2nCfURwg3bVwaD6+ZRH5W4oD2Ypom4fOdBA7WY/SEbI+Juhwc6/RT2h2wHSJMzYhn8apCxk5It72/I+EQp/d9yZkDX2FErVOGTqeLKfNWM3XhLbg9+vKEiIiIiIhIf1AwKCIiItLPzEiE1o8/pO2Tj8G0flyeUDKbnKeexq2JruvahcrdvFb2PtUewGkNRUqcqTy48DlSEjIv63zd3V1s376JlhZr0BwXF8fSpavIy7u8iUMZXkzT5EBpM29tLqe1K2B7THZaHA+vnsSsiRkDPjUXaerFf6COaLPPtm46oCIY5khbL2GbRDA23sOC5eMonjUKp829D1BbcZzDW96lt6vNtp4zZjJzVj9AcnrOVT8PERERERERsVIwKCIiItKPQvV11D//W4Lnqyw1R0ws2Y88SvLS5VoO7zoWCnTz6Z5/Z1O0CcNmSjAlCg+Ov5WSiWsu+5znzpWzd+92wuGwpZafP5olS24iLi7umvqWoXG+oZvXvyqjrKbTth4X4+KuJeNZO69gwPcRjHYHCRyqJ1xl3wtAg2mwr6mH3qh1otHldjJrfgGzF43GG2P/VrOns5XDW96l7twJ23pcQgolN93L6Mmz9TopIiIiIiIyABQMioiIiPQD0zTp2LKJlrffxLQJb2InTCT3Z8/hzcoegu5ksJwt/ZLXqr6gyeMAm1BjiSeHe5c+R3yM/V5rPxQOh9m3bycVFWWWmtPpZM6chRQXT1eAMgJ19oZ47+sKdhyrt12G0wGsKMnj3uWFJCd4B7QXIxgheKyJ4JkWMOy6gS4n7GnqpjVsXfITYPK0HBauHEdicqxtPRoJc+bAZk7v/YJo1Poa6XA4mTznJqYtvg2P1/4cIiIiIiIicu0UDIqIiIhco3BrK42vvIjvpM0EjMtFxvp7SL99HQ7nwE77yNDx9TTzwb7fsMvsxLSZEsyMOnhsyn1MHrPwss/Z3NzIjh1b6O7ustSSklJYsWINGRmXtwypDB/hiMFXB6r5aFcVgZB9yDZldCqPrJ3EmJzLC5CvlmmYhEpbCBxtxAza9xJ0Odjf0kN1wBrmAeSNSWHJ6glk5fbda33VaQ5tfoeeDusyuABZ+ROYs+YBUjPzrvxJiIiIiIiIyBVRMCgiIiJylUzDoPPrLTS/8zZm0LovmHdUHrk/e47YseMGvzkZFKZpcuzEB7xZv5NOt9MyJeg0TVbHj2fdvGfxemIu65yGYXDs2CGOHz+MabNH5YQJk1mwYCkej6dfnoMMDtM0OXK2hTc3l9PU4bc9JjMllgdXTWTulKwBnQI1TZNwdReBg3UYXSHbY6JOBye6/ZzpCmBdNBRS0+NYvKqQsZfY89DX3c7hre9Rc/aobT0mPomSFXcztni+pl5FREREREQGiYJBERERkasQamyg8eUX8ZeV2tZTV68l8/4HcXoHdglAGTqd7dW8feB3HHYFwG2dBi0w3Dw2/VHG5E6/7HN2dXWyY8cWWlqaLDWPx8OiRcsZP37iNfUtg6+muYfXvzrL6fPttvUYj4t1i8dy64LReNyuAevDNEzCFzoJHm8k2mb9MgOA6YBzoQhHWnsJ2QTTsfEe5i8bR/GsXFx97HkYjUYoO7SVU3s+IxK2Bo8Oh4OJs5YzfckdeGPjr+1JiYiIiIiIyBVRMCgiIiJyBcxolPYvP6f1w/dt9xJ0paaS+/TPSJh2+WGQjCyGEWXvodd4r/0oPptgxGOY3J46jTWzH8fturxft03T5OzZMxw4sJtIJGKpZ2XlsGzZKpKSkq+5fxk83b4QH+yoZOvhWmwyNgCWTs/lvpUTSEu6vInSq2EaJuFz7QSON2F0Bfs8rgmTvY3d9EStM4Iut5OZ8/OZvXAMMbF939eNF8o4tPltutoabesZo8Yxd80DpGWPvvInIiIiIiIiItdMwaCIiIjIZQrWVNPw0gsEqypt68nLV5D1wEO44hMGuTMZLC2Npbx+9GXOuCNgEwpONGN4dM4z5KSPv+xz+v1+du/eRk3NeUvN4XAwa9Zcpk8vwak9KkeMSNRgy6FaPtxRiS9oDXoBJuQn88iayRTmDVzYa0YNQmfbCJ5sxuixXzIUoMcJu5u6aQnb7zM4eVo2C1aMJyklts9z+Hs6OfL1B1woPWhb98YmMGv5esZPX4jDoXtZRERERERkqCgYFBEREfkRZiRC6ycf0fbpxxC1fnDuycwi56mniS+eOgTdyWCIRkNs2fcCn/RWEHJb90KLM0zuzp7PshkPXNFeaTU1F9i162sCAeuec0lJKSxfvorMzOxr6l0G17GKVt7cfJb6Vp9tPS0phgdumsDCqTkDtq+eGY4SLGsleLIZ028fTAIEXQ4OtfVS5bMPDfNGp7BkzQSycpP6PIdhRDl7ZBsndn1KJGQ3jehgwswlzFh6JzFx+tKEiIiIiIjIUFMwKCIiInIJgcpzNLz0AqHaGmvR4SB1zVoy770fZ8zALQMoQ6um+hCvnX6T824TnNYgZyZJPLTw56Qm5V72OSORCAcO7KGs7JRtffLkYubOXYTH47nqvmVw1bf28samco6fa7Wte91Obls4htsXjiXGOzD7CBrBCKEzrQRPN2MG7af/AAIeJ0faeqnqCWK3wmlqehyLVhUybmLGJcPL5tpzHNz0Fp0tdbb1tJzRzF39IBmjxl7pUxEREREREZEBomBQRERExIYRDNK64X3av/gcu83BvLmjyPnpM8RNnDQE3clgCId8bNz7W74K1RK1mRJMipo8MHo1c4tuv6LztrQ0s2PHZrq6Oi212NhYFi9eyejRClJGit5AmA07qth8qIaoYb+R4ILibB64aSIZl1iK81oY/jDBUy0ES1sgbN0f8NtePU4ONXdT47fujwoQG+dh/vKxFM8ahctmqdxvBHzdHN2+gaqTe23rnpg4Zi67k8IZS7UEroiIiIiIyDCjYFBERETkB3ylZ2h8+UXCTY3WotNJ+u3rSL/zLpwe7+A3J4OivGIbr1d8TIMbsJmYWujK4CeL/4KEuNTLPqdhGJw4cYSjRw9i2oTNBQVjWLx4BXFx8dfQuQyWqGGw7Wg97287R08fQdvY3CQeXTuJSQWpA9KD0RsicKKZ0NlWiNqHkgCdbgcHm7pp7GO/Q5fLwcz5BcxeNIaY2L7fIhqGQcWxnRzf+THhoHX5W4Bx0xYya/l6YuP7Xn5UREREREREho6CQREREZE/ifr9tLz7Np1bN9vWY0aPIefpZ4kdo2mu61XA38GHe/6d7UYrps2UYEYUHplwF8WFy6/ovN3dXezYsYXmZmvY7Ha7mTdvMZMmFQ3YnnPSv05XtfH6prPUNPfa1lMSvNy3spClM0bhHIC/02hXkOCJJkIV7dDHlCJAmxMONHXTGu57WdFJ07JZuGI8ST8yzdhaf56Dm9+ivbHatp6SmcfcNQ+QlT/h8p6EiIiIiIiIDAkFgyIiIiJA7/FjNL76EpG2NkvN4XaTsf4e0m65DYdbvz5dr06c+pQ3arbQ7nZYpgSdpsnK2ALumv8cMd64yz6naZpUVJSxb98uIhHrVFlmZhbLlq0iOTn1WtuXQdDU7uPNzeUcPttiW3e7HNwyfwzrFo8lLqb/Xyui7X4Cx5sIV3VguzkgF3/chMmh5m46IvbLijqdDiZPz6Fk4WjSMi49oRr093Jsx0ecO74bu4u6vTHMWLKOiSXLcToHZu9EERERERER6T/6ZEtERERuaNGeHprefI3u3bts67ETJpL702fwjsob5M5ksHR31vP2gd9y0NELNlOCeVEnj019iHH5s6/ovIFAgD17tnHhQpWl5nA4mDFjNjNnztEebCOAPxjh491VfLm/mkgfS3bOmZzFg6snkp16+cHx5Yq0+AgcayRS3dXnMSZQa0Q50tJLd9Q+EPR4XUwtGcXM+QUkJsVc8pqmaVB5Yi9Ht28gFLCfjBxTNJeSFfcQl5hy2c9FREREREREhpaCQREREblhdR/YT9MfXyXabf2w3eH1knnfA6SuXoNDwc11yTAM9h97h/ea99PjsgaCbtPk1sRJ3DL3adxuzxWdu7a2ml27tuL3W/dhS0xMYvny1WRl5Vx17zI4DNNk57F63t12jq7ekO0xBVmJPLJ2EsVj0/r12qZpEm3svRgI1vf0fZwDqkIRjrX34usjtIyN8zBzXj7T5uQRG/fj93J7UzUHN71Na32VbT05PYc5qx8gZ8zky3ouIiIiIiIiMnwoGBQREZEbTqSzg6Y/vkrPoYO29fjiqeQ8+TSerKxB7kwGS2trJW8c+j2nXCGwCQULDQ+PzXqK3KwrCz4ikQiHDu3lzJmTtvWJE6cwf/5iPB7vVfUtg6esuoPXvzrL+cZu23pinIf7VhSyYlYeTmf/7SNomiaR2m4CxxuJNvn6PM5wQLk/xMlOP4E+9hlMSo5h1sLRFM3MxeP58WU+QwEfx3d9SsXR7Zim9Zwut5dpi29j8pybcLn0VlJERERERGQk0rs5ERERuWGYpknXrp00v/k6hs+6NJ4zLo6sBx8medkKHI7++6Bfho9oNMK2Q6/wUedpgjaBYIxhcnd6CctKHsZ1hfultba2sGPHZjo7O6znjYlh8eKVjBkz7io7l8HS2hng7a3l7DvdZFt3OR2smVvA+qXjiI+9sknSSzFNk/CFToLHmoi2WSdNvxFxQGlvkDNdAUI24R1AWmY8cxaNYUJxFi7Xj088m6bJ+dP7ObrtQwI++yC0YFIJs2+6l/ik/p2MFBERERERkcGlYFBERERuCOHWVhpffQnfieO29YRZJWQ//hSeNH3ofb2qqz/Ba8f/SKU7CjYTXtPMOB6e9zPSU0df0XkNw+DUqWMcOXIAw7Du7ZaXN5olS1YSHx9/1b3LwAuGony65zyf7btAOGK/R9/MCRk8tHoiozIS+u26pmESrmwncLwJozPY53FhB5zq8lPWGyRinweSm5/M7MVjGDsh/bK/3NDRUsehTW/TXFthW09MzWLO6vsZNa74ss4nIiIiIiIiw5uCQREREbmumYZB59dbaH7nbcxgwFJ3JSaR9ehjJM1fqCnB61QkHOTzfb/jC/95Im7r33Fi1OT+UcuYN239Fd8DPT3d7NixhaamBkvN5XIxd+4ipkyZqntrGDNNkz2nGnlnawXt3fbB3KiMeB5ZM4nphRn9d92oQai8neCJJowe+/0LAYIOON7h45wvRLSPY8ZOSGf2ojGMGp1y2dcPhwKc3P0ZZYe2YprWINTl8lC88BaK5q3GdYV7bIqIiIiIiMjwpWBQRERErluhxgYaX34Rf1mpbT1pwSKyHnkUd1LyIHcmg6Xy/B7+WPo+9W7TdkpwviOF+5f8BYkJmVd0XtM0OXfuLPv27SQcDlvq6emZLF++mpSU1KttXQbBubouXv+qjIq6Ltt6fIybu5ePZ9XsfNyXsSTn5TDDUYJlbQRPNmH6I30e5wOOdfg47w9hN7/ocMDEqdnMXjiajOzEy76+YUSpOrWPEzs/xd/baXtMXuF0Zq/6CYkp/ReEioiIiIiIyPCgYFBERESuO2Y0SvuXn9P64fuYNqGNKzWVnMefIrFk9hB0J4MhGOhmw77f8HW4EdNmSjAtCg+Pu4Xpk9Ze+bmDAfbs2cH58+ds6zNmlDBz5lxcrivbo1AGT3t3kHe2VrD7pHXSEy6Gbqtm53PP8kIS4/pnWs4IRQmdaSF4qhkz2NfsH3SbJsc6fFQHwtitGOpyOymemcusBQUkp8Zd9vVN06S24jjHd3xEV1uj7TEJyenMXvUT8ifMuOzzioiIiIiIyMiiYFBERESuK8GaahpeeoFgVaVtPXn5CrIeeAhXfP/tESbDy6myTbxe9RltbsfFhOc7HKbJCk8u65f+BbExlz9l9Y36+lp27tyKz9drqSUkJLJs2SpyckZdde8ysELhKJ/vr+aT3VWEwvb7CE4dl8bDayZRkHXl94cdIxAheKqZ4JkW6OOaAO2GwbEOH3VB+ylCb4yL6XPzmTE3n/gE7xX10FRTzrHtG2itr7KtO10uiuatpXjBzbg9V3ZuERERERERGVkUDIqIiMh1wYxEaP3kI9o+/Rii1mkcT2YWOU89TXzx1CHoTgZDb28L7+z7DfvMTrCZEsyNOHhsyk8oHLvwis8djUY4dGg/p08ft61PmDCZ+fOX4PUqVBmOTNPkQGkzb20up7XLutcoQHZaHA+tnkjJxMx+2RPS6A0RONlMqKwVonazfxc1R6Ic7/TTGLIPBOMTvcyaX8DUklF4Y67s7VtHcx3HdnxEfeXJPo/JHVvEnNX3k5SWfUXnFhERERERkZFJwaCIiIiMeIHKczS89AKh2hpr0eEgdc1aMu+9H2dMzOA3J4PiwIkPeKd+J90ua6DjMk1uiRvHrQuexeOOveJzt7e3sn37Zjo62i01rzeGxYuXM3Zs4VX1LQPvfEM3r286S1l1h209LsbFXUvGs2ZuAR73te8jGO0KEjzRRKiiHYy+A8H6UITjXX5aw/bLiqakxVGyaDRTpuXgusK+ejtbOb7rU86fPgC2C5JCalY+s5avJ2dsUb8EoSIiIiIiIjIyKBgUERGREcsIBmnd8D7tX3wOpvXDb2/uKHJ++gxxEycNQXcyGNo7qnnjwPOccPrBJhQcF3Xx2PTHyBs1/YrPbZomp04d5/DhfRiGdQnIUaPyWbr0JuK1LO2w1Nkb4v1tFWw/Wm8bjTmA5bNGce+KCaRc4dKcdqLtAQInGglXdvSVxWECNYEQJ7qDdETsA8HMnETmLB7D+MmZOJ1XFtgF/T2c2vsF5Ue3Y9hMTgMkpGQwY+mdjJkyG4fj2oNQERERERERGVkUDIqIiMiI5CsrpfGlFwg3NVqLTifpt91B+l3rcWq/rOuSYRjsOPwaH7YfJWATnngNk7tSprJy7hO4nFf+K29vbw87d26loaHOUnM6Xcydu4CioumatBqGwhGDDdsqeOPLUgIh+3Bs8uhUHlkzibG5Sdd8vUirj+CxJsIXOvs8xgCqfEFO9QTpjtrvM5g/NpU5i8eQPzb1iu+rcChI2aEtnDmwiUgoaHtMTHwS0xbdSuGMJbhcehsoIiIiIiJyo9I7QhERERlRon4/Le++TefWzbb1mNFjyHn6WWLHjB3kzmSwNDaV8sejr1DhCoNNKFgUjeGROc+QmTH+qs5fWVnO3r07CIVCllpaWgbLl68iNTX9qs4tA8c0TfadbODFT05R39Jre0xGciwPrZ7I3ClZ1xzqRhp7CBxrIlLX3ecxBlDeG+RMb5DePgLB8ZMzmbN4NNmjkq+4h2g0wrnjuzi153MCPvs+3J4YiuavYfKcVXi8Wk5ZRERERETkRqdgUEREREaM3hPHaHzlJSJtbZaaw+0m/a67Sb/1dhxu/YpzPYpEwnx54EU+6zlLxGbZ0ISoyX3Z81kw436czitfIjEUCrJ3704qK8tt69OmzaSkZD4ul+uKzy0DxzRNTlW18+GOSspr7af2Yjwu7lg8llvnj8brufq/P9M0idR1EzjWRLTJPnwEiGBytudiIBiw2WfQ6XQweXoOJQtHk5YRfxV9GFSXHub4zk/o6WyxPcbpdDFh1jKmLryF2Phrn4wUERERERGR64M+NRMRGSInT55k165dHD9+nBMnTlBbWwvApk2bKCgo6PNxb775Jrt376a0tJTW1lZ6e3tJSUlhxowZPPzww6xatWqwnoLIoIn29ND85ut07d5pW4+dMJGcp54hJi9vkDuTwXK+5jB/PP0mtS7DdkpwDok8sPA5kpNzr+r8DQ117NixBZ/PGvbExyewbNkqcnN1fw0npmlysqqND3dUUlHb1edxS6bn8pOVE0hLuvppOdM0CV/oIni8kWirv8/jwqbJmZ4gZb1BQjb7nro9TqbNzmPmvAISk6+8H9M0aTx/hqM7PqKjqaaPoxyMLZ7HjCV3kJCSccXXEBERERERkeubgkERkSHyr//6r2zatOmKH/fiiy9SXV3N5MmTmTNnDrGxsVRXV7N161a2bt3KM888w3/5L/9lADoWGRrdB/fT9MdXiXZZP/h3eL1k3nc/qavX4riKCTEZ/oIhHx/v+y1bg7UYNlOCqRGTh0avYmbxHVd1/mg0ypEjBzh58qhtffz4iSxcuBSvlmAcNi43EJyQl8wjaydTmHflS3R+ey3DJFzVQeB4E0ZHoM/jgqbJ6e4AZ31BItY8kNg4DzPn5TNtTh6xcZ6r6qW14TzHtn9EU3VZn8eMGj+NmcvuJDUr/6quISIiIiIiItc/BYMiIkOkpKSEyZMnM336dGbMmMF9991HS4v9cmDf9atf/YrJkyeTkJDwvZ8fOHCAn//857zwwgvcdtttzJo1a6BaFxkUkc4Oml77Az0HD9jW44unkv3kT/FmZQ9yZzJYzpzbxusVH9PiAn6wH5zDNFnqyuCeZX9JXHzqVZ2/o6ON7du30N7eaql5PF4WLVrG+PETr+rc0v9M0+Rk5Z8Cwbq+A8Hs9Hgeu2UK08emXvU+gmbUIFTRTvB4E0aPda/Jb/gMg1PdAc75QkRt6onJMZQsHE3RzFw8V7mEaXd7E8d3fkx12ZE+j8kYNY6Zy9eTXaD7VURERERERC5NwaCIyBB57rnnrupxs2fPtv35vHnzuP3223n33XfZvXu3gkEZsUzTpHv3LpreeA3DZllHZ1wcWQ88TPLyFVf9ob8Mbz5/B+/t+w27o61gk6VkR+DRCXcxacKKqzq/aZqcOXOCgwf3YRjWOCc3N4+lS28iISHxqs4v/etyA8HMlFgeunkKaxeMwQG0tfW9B2Cf14oYhMpaCZxsxvSF+zyuOxrlVHeQKn8Iw6aelhnPnEVjmFCchct1ddPM/p5OTu75jHPHd2OadleB5PQcZiy7i/wJM/R6KCIiIiIiIpdFwaCIyHXE7b74su71eoe4E5GrE25tpfHVl/CdOG5bT5hVQvbjT+FJSxvkzmSwHD7zKW9Vb6HLZtlQl2myxpvP7cuew+uNv6rz+3y97Ny5lfr6WkvN6XQye/YCpk5VyDIcmKbJiT8Fgud+JBC8c8k4lkzPJTsrCZfLSTRqH6T1ea1QlOCZFoKnmjGDdrN/F3WEo5zqCXAhEMZmxVBy85OZvXgMYyekX/U9FAr6ObN/E2WHthCN2IeTcYkpTF9yB+OmLsDpvLpJRBEREREREbkxKRgUkRtGT08PO3fuZO/evZw6dYqqqiq6u7uJiYkhOzubmTNncuedd7J8+fIR+YHw6dOn2bhxIy6Xi+XLlw91OyJXxDQMOr/eSvM7b2EGrft4uRKTyHrkMZIWLByR//+UH9fZ1cAbB37LMXrAJhQcE3Hw6NSHGV0w56qvcf78OXbv3k4oFLTUUlPTWLZsNenpGVd9fukfVxII3rVkHIun5+K+yqk8IxAheKqZ4JkWCPcdJraGIpzsCVAbjNjWx05IZ/aiMYwanXJVfQBEI2HOHtnO6X1fEAr4bI/xxsRTvOBmJpYsx+3Rl4BERERERETkyikYFJEbwosvvsivf/1rgkHrh8GRSITKykoqKyv58MMPmTdvHv/wD/9AXl7eEHR6+d599132799POBymtraWI0eO4Ha7+fu//3smTZo01O2JXLZQYwONL7+Iv6zUtp60YCFZjzyGOyl5kDuTwWAYBjuOvsUHzfvxO62BoMcwWZc4idXzn8bl8lzVNUKhEPv376Kiosy2Xlw8gzlz5uNy6VfjoWSaJsfPXQwEK+sHOBD0hQmebCJY1gaRvgPBxmCEUz0BGkLWQNDhgInF2cxeNJqM7KtfdtYwDKpO7ePk7o34utttj3G5PEyes5Ki+Wvxxl7dtKyIiIiIiIgIKBgUkRtEZWXlt6FgTk4OS5YsYdq0aWRkZBAMBjly5AgbNmzA5/Nx4MABnnjiCd566y0yMobv5MihQ4d4//33v/1zXFwc//W//ld+8pOfDGFXIpfPjEZp/+oLWj94DzNsXS7PlZJKzhNPkVhiv6+mjHx1jeX8n63/SikBsAkFJ0fcPFryFFnZU676Go2NDezcuYWenm5LLT4+gSVLVpKXV3DV55drN5iBYLQ7SPBEM6HyNjDsFgO9qC4Q5mRPgJawdVlRl9tJ0cxcShYUkJwad1V9wMXnXXfuBMd2fExXa73tMQ6Hk/HTFzFt0W3EJ6Ve9bVEREREREREvuEwTbPvd8QiIteJ//7f/zs1NTU888wzLF68GKfT+oFibW0tzz77LJWVlQDcd999/OpXv7Ic98tf/pJjx45d0fVvvvlm/vZv//aSxyxdupSWlhY2bdpEQcHlf0jt8/k4f/48r776Ku+++y7Lly/nX/7lX4iNjb2iHvtDe7uPSKTvvZlk5ElPT/h2v662tt5+O2+wppqGl14gWFVpW09etoKsBx/CFZ/Qb9eU4SNqRNh+4jU+aDpO2CYQjI+a3JNewuLZD1/1/mnRaJRjxw5y4sRR7H7dHTu2kEWLlhETM/ivlXLR5QaCWakX9xBcPO3HA8G+XrOiHQECx5sIV7Zjuzngn/qpDoQ51ROk3ebfMm+Mi+lz8pkxL5/4hGtbxrO5toJj2z+ipe5cn8cUTJrFjKV3kpyec03Xkms3UP8WiujekoGg++r65Xa7SEvTygEiInLtNDEoIjeEv/mbvyE1NfWSx+Tn5/PP//zP3H333QBs3LiRv/u7vyMu7vvTAPX19d+Gh5erubn5io6/EvHx8RQXF/N//V//Fw6Hg3feeYcXX3yR//Af/sOAXVPkapmRCK2ffETbpx9D1PrBuzszk5wnnyZh6rQh6E4Gw5nKHbxX/gm1rqjtlGCJEceD839OSurVT/F1dnawfftm2tpaLDWPx8PChcsYP36i9qscIhcDwdY/BYLWSc5vXEkg2JdIq4/g8SbC5zv7PMYwTc77w5zqDdBls6xofIKXmQsKmFYyCm/Mtb196mip4/iOj6g7d7LPY7ILJjJz+XoyRo27pmuJiIiIiIiI2FEwKCI3hB8LBb9RVFTE+PHjqaysxO/3c/78eYqKir53zKuvvjoAHfaPe+65h3feeYdNmzYpGJRhJ1B5joaXXiBUW2MtOhykrl5L5r0/wTkE064y8Bpaynn/2B85QS/YDAEmR0weGrWUWdPvvurAzjRNSktPcfDgHqI2wXN2di7Llq0iMTHpqs4v1+ZKAsG7loxn0bScqw4EA7Vd9Ow8T6S27+tETZNzvhCne4P0Rq2BYEpaHCULRzN5eg5u99X18Y3erjZO7PqUqlP76WtkMTUrn5nL7iJ3XLFCaxERERERERkwCgZFRH4gMTHx2//+Zl/CkSI9PR2Atra2Ie5E5M+MUIjWD9+j/YvPwWZJR09uLrlPPUvcpElD0J0MtF5fB58cepEdwTqifYQdi0nhviV/SXzi1e/r6vf72LXra2prqy01p9PJrFnzmDZtpu1S0jKwTNPkWMXFQLCqoe+gLjs1jjuXjLvqQNA0TXznO+jcV4O/pu+lSSOmSXlvkDO9Qfw2+wxm5iQyZ/EYxk/OxGkz1Xolgv4eTu39gvKj2zFswmqAhJQMZixZx5iiOTgcuj9FRERERERkYCkYFBH5jlAoRFVV1bd/zsvLG7pmrsLevXsBGDt27BB3InKRr6yUxpdeINzUaC06naTfdgfpd63H6bm2/bpk+IlEw2w98jqftx/H53SATSiYHYFnpt7J6IIV13StCxeq2L17G8FgwFJLSUll2bLVZGRkXtM15MpdSSB419KLgaDrKoJbIxQlXNFGsLSVzs6+v9ATMkzKeoOU9gYJ2XxJIX9sKrMXjaZgXNo1T+xFwkFKD26l9MAmwiHrfQkQE5fI1EW3MmHmUlwuvS0TERERERGRwaF3oCIi3/Hxxx/T3X3xw8tp06aRlZU1xB1934kTJ6ivr2fVqlW43d9/Cd+yZQv//M//DMADDzwwBN2J/JkR8NP8ztt0bt1sW48ZPYacnz5D7Nhxg9uYDDjTNDly9ks+OL+JFpdpu49gQtRgfcZU7lz1HE5XDG1tvVd1rXA4zP79uygvL7WtFxVNY86chZbXSxlYpmlytKKVDQMcCEZafIRKWwlVtkPUfnlOgEDUoLQ3yFlfkLDNYeMnZzJ70Why8pKvuIcfMqJRzp3Yxck9nxPotZ9adHtimDJvNVPmrsLj1dLJIiIiIiIiMrj0KYmIyJ+0tbXxj//4j9/+eaD36Nu6dSv/9m//9u2fOzs7AfjFL36B13txemrlypX81V/91bfHNDQ08Itf/ILk5GSmTZtGRkYG3d3dVFZWcuHCBQCeeeYZ7rjjjgHtvS/JyfqA83rzzTJ6TqeD9PSEy3pMx6HDVP37bwi1tFpqDrebggfvZ9Q9d+NUWHPdKa8+yqt7X6GMgO0+gi7TZJU3mwfX/gdS0nNxOByYpnnZ99Z31dXV8fnnn3372vld8fHx3HLLrYwbN+4qnoVcLdM0OXC6iTe+LKWi1vr38o1RGQk8uHYSK0rycV3hkqFGOEpPaQvdxxoINl46UPZFDU73BKjwhfjhIp5Ol4MZc/JZtLKQzOxE28dfCdMwqDh5gH1fvU9na5PtMU6Xi2kLbmLOyjuJT7z2EFIGz9X8WyhyOXRvyUDQfSUiIiI/Rp/IiYhwcQnRv/7rv6a19WKQsXbtWm6++eYBvWZbWxtHjx61/Pz06dPf/ndhYeH3ajNmzOAXv/gF+/bto7KykoMHD+J0OsnOzubuu+/mwQcfZN68eQPa96Vc6Qe8MnI4HA5crksvrRfu7qby9y/RvGWrbT1pymQm/uI/Ej9m9AB0KEOptaOeV7f+H3YHmjD7WIKxxIjlySU/pWDsrO/9/HLure+KRqPs2bOHvXv3YtosBzlp0iTWrl1LfHz8lT0JuWqmabL/VCOvf3GG8ppLBIKZCTx882RWzi644n8vgi0+Oo/W032qCSNov1ffN7oiUc70BKn0hzB+UPN4XcxdPJZFK8aTnBp3RT30pbr8JLs+e4fm2ir7AxwOJs9ayMKb7yUlPbtfrilD40pfr0Qul+4tGQi6r0RERKQvDtPuExURkRuIYRj88pe/5KOPPgJgzJgxvPPOO6SkpAxxZyNPNPrDj2BlpHM6Hd9OdRlG378ytO7eQ9XvnifcYQ0FnDExjH70EXLvuA2Hy2aMTEasQMjPB9ufZ2PraUI2S4YCjI44eLx4HTNm3va9fdsu9976rvb2dj77bCONjdY9Kz0eDzfdtIqpU6de8/5wcnlM02T/6Ube+LKMc5eYEMzLTOCBNVc+IWhGDXrL2+g61kCgxn5Zzm8YpklNIMxZX4imUMRSj4v3MH/ZOOYtGUtcfP/sadpcW8WeL96lpuJUn8eMmTyDhTf/hMxR+kLESHY1r1cil0P3lgwE3VfXN30ZV0RE+oOCQRG5oZmmyd/93d/x1ltvAZCXl8err75KQUHBEHc2MrW3+4hELj3JISNLenoCLpeTaNSw3Qcu0tlB02t/oOfgAdvHxxUVk/PU03izNCVzPTFMg72nPuKjup109pH1pkQM7kqfxYLZD+FyWYOYH7u3vss0Tc6ePcOBA7uJRKyhT1ZWDsuWrSIpScszDgbTNDla3sqHOyo539j3HoI5aRf3EFw49cr2EIz2hAiVtRI624YZsP59f1dvxKDcH+ScL0TA5sPPxOQYShaMpmhmLh5v/3wxobu9ieM7P6G67HCfx6TnjmXW8vVkj57UL9eUoXUlr1ciV0L3lgwE3VfXL7fbRVqaVsUQEZFrp6VEReSGZZomf//3f/9tKJibm8vLL7+sUFDkMpimSffuXTS98RqGz/qBgzMujqwHHiZ5+QpNb11nyqr38W7pB9Q4I7b7CHoNkzXePG5e9DQx8anXfD2/38/u3V9TU3PBUnM4HMyaNZfp00twXkHwJFfHNE2OlLewYUdVvweCpmESqe0mWNpCpLbvc3/TR10wQrkvSH0wgt23HDNzEpk5v4CJxVn99s16f28XJ/d8xrnjuzAN+wn5pLRsZi67i/yJM/XaJyIiIiIiIsOSgkERuSGZpsn/+B//gzfeeAOAnJwcXnnlFcaMGTPEnYkMf+HWVhpffQnfieO29YSZs8h+/Ck86emD3JkMpKaOC7x/+BWOmV1gk7M4TJMFZiJ3lTxGWubEfrlmTc15du3aRiDgt9SSk1NYtmwVmZmaRh1o3wSCH+6o5EJjT5/H5aTFsX7peBZMzb7sQNDwhwmdbSNY1orZG77ksQHDoMIXosIXorePpasnFmUxf9l4xk/MoL3Dd1k9/JhQ0E/pgU2UHtxKNBKyPSYuMYVpi29n/LSFOJ1aMllERERERESGLwWDInLD+SYUfP311wHIzs7mlVdeYezYsUPcmcjwZhoGnV9vpfmdtzCDAUvdmZhI9iOPk7RgoSZlriO9wS42HnqZbb4LRPv4e50YdnLfpLsYW7i0X64ZDoc5eHAPZWWnbeuTJxczd+4iPB5Pv1xP7JmmyZGzLXy480cCwfR41i8Zd9mBoGmaRBp6CZW2EL7Qie3I33c0/mk6sCYQxi4OjIl1UzQzl+lz8hhXmPnt8mnXKhoJU350O6f2fkkoYL8UmycmjuIFNzOpZAVuT//sXSgiIiIiIiIykBQMisgN5YehYFZWFq+88grjxo0b2sZEhjl/XT01//tf8ZeV2taTFiwk6+HHcCdrj7frRSQaYduJt9nYfBifE7AJBbPCBvfkLGTmjPtwuvpnSqqlpYnt27fQ3d1pqcXGxrFkyUoKCjTdPZCuKBBcOo6FxTk4nT/+ZQAjGCFU0U6otBWjK3jJY8OmyTlfiHJfkK6IfciXmZ3I9Ll5TJyajcfTf1N6hmFw/vR+Tuz6FF93u+0xLpeHSbNXULzgZryx2utHRERERERERg4FgyJyQ/mf//N/WkLB8ePHD3FXIsOXGY1Su+Ejzr/2OmbIusyfKyWVnCeeIrFk9hB0JwPBNE2OV27j/XMbaXIatsuGxkcNbo0fz01Lf4o7JqFfrmsYBidOHOHo0YOYpnWErKBgDIsXryQuLq5fridWpmly+GwLG3ZUcqGpfwJB0zSJtvgJlbYQquqA6KXHA9vCEc72hjgfCNke6nQ6mFCUxfQ5eeTkJ/frdLJpmtSdO8HxHR/T2Vpve4zD4WD8tEVMW3wb8Ulp/XZtERERERERkcGiYFBEbhj/63/9L1577TXgz6FgYWHhEHclMnwF62o58X+/QG95hW09edkKsh58CFd8/wRDMvRqmkt59/jrlOGzDQRdpslSRyrr5v+UxNT8frtud3cXO3Zsobm50VJzu93Mm7eYSZOKtETtALncQDD3T4HggssJBMNRQpUdhEpbibZZ94j8rqhpUuUPUe4L0RaO2h6TkORlakkeU2eNIj6x/5fsbK49x7HtG2ipO9fnMQUTZzFj2Z0kp+f0+/VFREREREREBouCQRG5Ifz617/mD3/4A3Dx2/5PPvkk586d49y5vj8ABJg6dSp5eXmD0aLIsGEaBu1ffEbrB+9hRiKWujszk5wnnyZh6rQh6E4GQqevlQ2HXmJvsAGzj/BtetjNvVPvJ3f0nH67rmmanDx5gi1bthKJWCdSMzOzWLZsNcnJKf12TfmzgQgEo+0BgmUthCraIXzpff66IlHO9gap9IcJ20yJAuSNSWH6nHzGTcrA5frx/QuvVGdLPcd2fkRdxYk+j8kqmMjMZXeRmacVBkRERERERGTkUzAoIjeEQ4cOffvfpmnyT//0T5f1uF/96lfcd999A9WWyLATamyg4YXnCVSUW4sOB6mr15J5709wxsYOfnPS74KRIJuOvs6X7ScJOR22+wjmh03uLVhJ0dR1/Tqx5/f72bTpK8rLrfeaw+FgxozZzJw5B6ez/8OgG51hmhwua2HDzkqqLxEIjsqI566l41hQdOlA0IwahM93EixtJdrUe+lrA9V/mg5sClm/eADg9jiZMj2XaXPyyMgamInk3q42TuzeyPlT+2yXrgVIycxj1vL15I4r1rSqiIiIiIiIXDcUDIqIiAimYdCxeRMt772NGQpZ6rH5eWQ9/jRxkyYNQXfS30zTZF/ZZ2yo3kqH0wSb0Cc5YrAuuZjFcx/D5enfILi2tprdu7/G5/NZaklJySxbtoqsLC3X2N/6OxCMdgcJlbUSKm/HDNiHfN/wGQZne4Kc84cIGH0EcelxzJiTz+TpOcTEDszblKC/l1P7vqD8yHaMqH3PCcnpTF+yjrHFc3E4FEyLiIiIiIjI9UXBoIjcEF599dWhbkFk2Ao3N9Pw4vP4y0qtRYeDvLvvouChB+novfQH/zIyVNQd4d3T73DeEbLdR9BjmKxyZ3HL4qeJS8rq12uHwyEOHNjD2bNnbOsTJxYxf/5iPB5Pv173RncxEGzmwx1V1DRfWyBoGiaRmi6CZa1EarsveV0TqAuGKe8NUh+MYBcHOhwwdmIG0+fkUTAubcAm8yLhIGWHvubM/q8IhwK2x8TEJTB14W1MmLkEl1v3oIiIiIiIiFyfFAyKiIjcoEzTpPPrLTS//SZmMGipe7KymfT/+gVp06cRjRqgYHBEa+mq4/3DL3Mk2g422YvDNJkbjWX9zEfJyC3u9+s3NNSxa9fX9PRYw6SYmFgWL17BmDHj+v26N7IrCQTXLx3P/KLsPgNBwxcmdLaN4NlWzF7rfpDfFcLkbE+Qcl8QX9R+OjA2zk3xrFFMm51HUsrALU1sRKOcO7Gbk3s+I9DbZXuM2+NlytzVTJm3Go9XyySLiIiIiIjI9U3BoIiIyA0o3NZK40sv4Dt10raeunoNmT95kORR6YPcmfQ3X6iXzw6/ytc9FUT6mMYqDMF9hbczbtKqfp/YikQiHD68j9OnT9jWx40bx/z5y4iLi+/X697IDNPkUGkzG3ZWUtPc955/PxYImqZJpKGHUGkr4Qud2I78fUdzOEppT4DaQBijj2OycpOYPjePicXZuN0Dt0ynaZpcKD3E8Z2f0NPRbHuM0+micOZSpi28hdiE5AHrRURERERERGQ4UTAoIiJyAzFNk66dO2h+8zUMv99Sd2f8/9m77+jI7vr+/687Xb33rt1VWW1fbV9X1tgmDi6Ekm+AL5iEb0jgnITkBzlJvoFAciAkBJ98004IzU6AmGIMBlxxwbv29r4raVe9t1Ev0+79/aG1XDSj1a41KjvPxzkkM/P53HvfV/pYmp2XPp9PhnI/8jHFV69fhuqwmEJmSC9feFw/73lVEzbNrNn4FhkBU/dmbtXWLe+Tzb74Syf29/fp4MHnNTo6MqfN6XTqlltuUU3NBg0Nzd1rENfuWgLBe/eXqbYyfCBo+oLyXx6Sv2FQ5ujc2cRvFJTUODEzO3A0GD4OtNkNra3O1oZt+crJj34A1375gg49+QP1d7ZE7FNctV0b9/6GElMzo14PAAAAAAArCcEgAAAxIjg8rN6Hv6WJM6fDtqfcfIsy3/sB2ePilrgyLLZzbYf044Yn1GsLht1H0BMydae7ULft/qic8SmLfv1QKKTTp4/r/PnTsqy508xycvL0rnfdpfT09JllavG2vBYIPn6wWZ3zBIL5mQl6977SsIGgZVkKDUzKVz+oQMuwFGEJ0NcMm6bqRqfVNuVXKEKfxGS3arbmq3pzruLiXdd4V9euv7NJBx9/Wh2NFyL2yS2p0qabflNp2UVRrwcAAAAAgJWIYBAAgBucZVkaO/Kq+v77v2ROzg0NHGlpyvnfH1XChk3LUB0WU5e3WT8684jqzPGwgaDNsrTXTNQ9Wz+spMyyqNTg9Q7o5Zdf0PCwd+71bXZt27ZD1dUblZKSGJXrx5JrDgSrsmV7y8xRKxCSv3lY/voBhbzT81/PkFqm/GoY82koGCkOlApLU7VhW4FK1mZE3LNwsViWqa6m86o7+pwGupoi9kvPLdGm/b+pnOKKqNYDAAAAAMBKRzAIAMANLDg6qr7/+o7GTxwP2568Z5+yfvt/yR6fsMSVYTGNTg3riZPf0StTHTIj7BG43m/TfZX3qqBsT1RqME1T586d0unTx8POEszIyNK+fbcqNTUtKtePJYsRCIaGpuSrH5S/aUgKzD9rc0KWLo5MqWUqoECY760kOV12VW3MVc22fKVlRH+/SDMUUmv9cdUdfU6jg90R+yWlZWvjvntUuG7zou+fCQAAAADAakQwCADADWrs+FH1PfKwQuNjc9rsycnK+fBHlbhl6zJUhsXiDwX0/NlH9dTAafki7COY5zd1f94erd94vwxbmGmEi2B4eEgHD76gwcH+OW02m02bN29XTc1m2aJ0/VhhWpaOX9lDcL5AsCAzQe/eX6btlVlvCgStkKlA64h89QMK9c2/r6MlqTMQVN3olPr9kWcHpmXGa8O2fFXU5Mjljv4/LQJ+n5rOHVLD8Rc0OTYUsV98UorW77pLZRt2y2azR70uAAAAAABWC4JBAABuMKHxcfV99780duTVsO1JO3cp+399SPZElnJcrSzL0vHGX+nxlmfktZlhlw1NCpr6jYQ12rvvw7K7ozMj1DRN1dWd04kTR2Wac8OjtLR07dt3m9LTM6Jy/VgxGwi+3KzOgWsPBEOjPvkbBuW/7JXlixzySZLPJtWNTKtx0iefGX52oGFIZRWZ2rAtX/nFqUsyE883Na5LJ1/SpVMvyT8dOdRMTEnXlv13qmr7TRobD0a9LgAAAAAAVhuCQQAAbiDjp06q95FvKzQyMqfNnpik7A9+SEm1O5ehMiyWpr4L+tG576tF02EDQadp6RYjTXft/IjiUvOjVsfY2KgOHnxBfX09c9oMw9CGDVu0adM22e3M1rpewZCpo3V9+sUrrfMHglkJundfmba9IRC0TEuBjlH56wcV7Jo7a/iNLEkDlqnzQ5Pq9kUO0+LinVq/JU/rt+QrMdl9Xfd0rSZGBlV3/Hk1n3tFoWAgYr/kjDxV7XiHtuy5SS6XS6GQKYlgEAAAAACAtyIYBADgBhCanFD/97+r0UMHw7Ynbt2u7A9+WI6UlCWuDItlcLxPPzn5HZ0IzF2u8zXb/A7du+F9yizcErU6LMtSQ8NFHT/+qoLBucFLcnKK9u27TVlZ2VGr4UY37Q/q16e79fTRNg2O+iL2CxcImhMB+S8NynfJK2sycpAmSQGbocYJn+pHpzQZYXagJOUUJGvDtnytqcyS3bE0y8EO93eq7uhzaqs/IcuKvAdiZn65qna8Q/nlNTIMm+x2/nkDAAAAAMB8+JczAACr3MS5s+r9zrcUHPLOabPFxyv7f31QSbv2LMlyf1h8U4FpPXX6v/X8SJ2CEb6HpX5LDxTdrvL1d0X1+zwxMa5Dh15Ud3dn2Pbq6o3aunWHHA7eYl6PkQm/njverudPdGpiOvJst7cGgpZlKdA1Jn/9oALtIzNTAOcxbJPODU6oYzoQsavdYdO69dnasC1fWblJ139T18CyLPV3XFbd0efU3XJh3r755RtUteOAsgrKl6Q2AAAAAABuFHxqAwDAKmVOT6n/0f/RyEsvhG1P2LhJOf/7o3Kkpi1tYVgUpmXqYN3P9UTnyxq3WTMbu71FesDUb6bWqPam/yWbM3pLO1qWpaamSzpy5KACgbmz0BITk7R37y3KzY3e0qU3sh7vpJ460qaDZ3sUDEWeHVeYlaB3vyEQNKeDmm70yl8/KHPMP+81QjZDbf6AznsnNTbPNZJSPKrZlq/qTbnyxDmv+56uhWWZ6mw8p7qjz2qwuyViP8NmU0lVrapq36GUzLwlqQ0AAAAAgBsNwSAAAKvQZN1F9Xz7GwoODMxps3k8yvrA/1LyvpuYJbhKXew8rh/V/VjdRiDsPoKekKk7HDm6bd9H5U7MjGotU1OTeuWVX6ujozVse0VFtbZv3yWn0xXVOm5EjZ0jevJwm0409M87yW9NQbLu2lmirRWZMiSF+ic1VT+oQMuwNM8SoJI04TB03juplgmfQvP0KypP04ZtBSouT5fNtjQ/N0KhoFovHlXdsV9pzNsbsZ/D6VL5xr2q2HarEpLTl6Q2AAAAAABuVASDAACsIqbPp4Ef/UDDv3o2bHt8dY1yPvKgnBkZS1wZFkP3cJt+fPq/dCE0LIXJZmyWpd1Bj35j8+8oNbcq6vW0tDTp8OFfy+ebu89dfHyC9uy5WQUFRVGv40ZiWpbOXB7Uk4db1dAxMm/fLWszdffuYq0rTJUVCMnfMChf/aDMoel5j7NshrotU2f6xjUUjBwHutx2VW3KVc3WfKWmx1/X/VyPgH9ajWcOqeH485qaiPw1cHkSVLH1Zq3dcrPccQlLVh8AAAAAADcygkEAAFaJqUuX1POt/1Sgb+7MGsPtVtZvvV8pt97GLMFVaMw3pp+ffFgHJ1pkRvj+Vfmk+9f8hgrW3RL17/H09LSOHDmolpbGsO3l5eu0c+deuVzRW770RhMImnr1Qo+ePNym7sHJiP0cdkN7anJ1585i5WcmKOid0uQrHfI3DUnByEuASpLPaVP96LQaRiYVmGciYUZWgjZsL9C69dlyuuzXe0vXbHpiVA0nX9Ll079WwDcVsV98crqqtt+usg275WAmKgAAAAAAi4pgEACAFc4M+DX4kx9r6OmnJGvup/1xFZXK+ejH5MrKXobq8HYEzaCeP/+Ynuo9qimbwu4jmOsP6b6s7dqw5X0y7NF/69bR0apXXnlJU1NzgxuPJ067d9+k4uLSqNdxo5icDurFU516+li7RsYj7wMY53bo1q35OrC9SKluh/zNwxp7tVOhwcgBmiRZhjToMHS6d0x9vmDEfjabofLKTNVsy1deYcqS/gHB+PCA6o7/Si3nDisUmrtH5WtSMvNVteMdKq7YJpt96QJLAAAAAABiCcEgAAAr2FRTk3q/+XX5e7rntBlOpzLf816l3n5Ahi3MRnRYsSzL0smWX+snTb/UoBEKu49gYtDU3Z4i3bTno7LHJUe9Jr/fr2PHXtHly/Vh24uLy7R79355PHFRr+VGMDTm0zNH2/XCqU5N+yMv55mW5NYdtUW6eXOeXKN++c/0aqRl5KqzA4MumxqnAjo/MC7fPPsMxie4tH5rntZvzlNC0tLO8Bzqa9fFo8+po+GkrDB/1PCarII1qt55h3JLq5nxDAAAAABAlBEMAgCwApmBgLw/e1zeX/487CxBT/ka5T74e3Ll5i5DdXg7Wgca9MOz31OTNRF2H0GHaelmK1F3bf/fSsgoXZKaurs7dejQi5qYGJ/T5nK5tWvXPpWWriG0WYDO/nE9eaRNr57vVWiewK4gM0F37SrWzjUZMltG5HuqSf7h+fcOlKRRj11n+sfUPs/sQ0nKK0zRhu35KqvIlN2+dH84YFmW+tovqe7os+pprZu3b8GaTara8Q5l5pctUXUAAAAAAIBgEACAFWa6rVU93/i6/J0dc9oMh0MZ9z6gtDvvYpbgKjM0NaifnHhYx3xzZ3++ZovPpnur7ld22a4lqSkQCOjEiSOqrz8ftr2goEh79tys+PiEJalntbIsSw3tw/rl4TadaRyct29Vcaru2lmk6gSP/Je8mvhRnTRPgChJpsuujlBIJ7tGNDlPX4fTpoqaHNVszVdmTuJ13cv1Mk1TnZdP6+LR5zTU2xaxn81mV0n1DlXteIeS03OWsEIAAAAAACARDAIAsGJYwaC8v3hCgz//mRSau/Sgu6RUuQ/+ntwFBctQHa7XdNCnZ878j54bOqdAhAl3xT5T9+fv07qN9y5Z4NvX16ODB1/Q2NjonDan06na2j1au7aSWYLzME1LJxr69cvDbWrunvt1fI1hSNsrsnT3lgLljQflPzOgibH5Z/xZkkZcNl3wTqhtzKf5osOUtDjVbMtX1cZcuT1L+/Y+FAyo5cJR1R17TuPD/RH7OZxurdm0TxXbblF8UtoSVggAAAAAAN6IYBAAgBXA19mhnm98Xb621rmNdrsyfvNepd/1LhkOfnWvFqZl6tXLz+hnrb/SqM0Ku2xoaiCk30xcp537PiybO35J6gqFgjp16pjOnz8Ttj03N197996ixMSkJalnNfIHQjp4rkdPHWlT39BUxH5Oh037N+TqrtIMxXdPKPDrdk3PPzlQPpuhSxPTujQ6remrzCQsWZOuDdsLVFSWtuQBrt83pcbTL6vh5IuanogcirrjElWx7Vat3bxfLs/SjHEAAAAAABAZny4CALCMrFBIQ0/9UoM//YmsYHBOu6uwSHkf+z25i4qXoTpcr/ru0/rRxR+qUz4pzARAt2nqHcrQgd0fkTslb8nqGhzs18svv6CRkaE5bXa7Xdu371JlZQ2zBCMYnwro+RMdevZ4h8YmAxH7JXgcetfmfO1NjJNaR2Qd7lLk3pIpqdMf0KUxn3r9c38OvJHb41D15lzVbM1Xcmrc9d3I2zA1PqKGky+q8fTLCvgj74mYkJKhqtrbVbp+lxxO1xJWCAAAAAAA5kMwCADAMvH3dKvnm1/XdFPT3EabTenv+g1l3HMvswRXkd6xLv341CM6Fwi/z5xhWdrld+o3N35AqYWblqwu0zR15swJnT17UpY1dxZaVlaO9u27VcnJKUtW02oyMDylp4+266UzXfIHzIj9slM8+q3KHFUEJbNtXJYm5j3vaCikyxN+NU/65Q/zfXnTufOStH5Lntauz5bTab+u+3g7xob6VHfsV2q5cFhmmKWOX5OaVaDqnXeocN1m2WxLXycAAAAAAJgfnzQCALDELNPU8LPPaOCxH8oKzJ1H5MrLV+6DvytPWfkyVIfrMTg5qGfOPaqDY00yI8y2q5g2dX/JARWtf6cMY2n2EZSkoSGvDh58Xl7v3LDSZrNpy5ZarV+/SbYl2ttwNWntGdOTR9p09GKfzHmCu405iXp3froyh32y2scVOTqUgpaltim/Gif9GghEDtikmTBwTVWWyiuzlJzquc67eHu8PW26ePRZdVw6Lc2z02F2UYWqd7xDOSVVzDgFAAAAAGAFIxgEAGAJ+fv61Put/9TUpYa5jYahtHfepYz77peNpfdWhe7RDj117oc6PtU5EwiGCUSy/SHdm7ZJm2/+gAyHe8lqM01TFy6c0alTx2Sac6Oq9PQM7dt3m9LS0pesptXAsixdaBnSk4dbdb5l7pKrr3FIurs4XXsTPHIP+6SeiXliM8kbCKpx0q/WKb8C83TMyX89DExKWZ4w0LIs9bbV6+KRZ9XXHuZn1SxDhes2qWrHAWXklixZfQAAAAAA4PoRDAIAsAQs09TIi8+r/wf/I8vvn9PuzMlR7kd/V3Fr1y1DdbhWLYOX9OSFx3TO3y8rQiCYEDR1pzNPN+//iJyJGUta3+joiA4efEH9/b1z2gzD0MaNW7Vp0zZmCb5ByDR1tK5PT77apra+8Yj98pwO3VuQqrWmIVvAlIZ9EfsGTEstV2YHDgUjzw7MLUhWeVWW1lRmKjF5ecJASTLNkDoundbFo89quK8jYj+b3a7S9btUVXu7ktKyl7BCAAAAAADwdhEMAgAQZYHBAfV++5uavHghbHvqO+5Q5gO/JZt76WaT4dpZlqW6ntN6uv5najDHZl4MEwjaLUs3BTy6e+sHlZhTufQ11p3XiROHFQqzD1xKSpr2779VGRlZS1rXSubzh/TSmS49faRdg6PTYfu4DKk2IU7vyEhUmt+UfJbmW1az3z8zO7Bt2q9QhG65hckzMwMrspSYvLz/7QcDfrVcOKy6Y7/SxEj4/TElyenyaM2mfarYdqviEtmPEgAAAACA1YhgEACAKLEsS6O/fkn9j35P5vTcwMGZmaWcj35M8ZVVy1AdFsq0TJ1pe1VPNT2lNmsqYj+naWlnwKUD6+5W1pp9S77P2vj4mA4delE9PV1h22tqNmvLlu2y23n7J0mjE349e7xDz5/o0MR0MGyfQoddNyfHa0ucSw5Tkj/y7oE+01TzpF+NU36NBsP3yytKuRIGZiohafn/EMA/PanLp19Ww8kX5Zsci9jPE5+kim23as3m/XK545awQgAAAAAAsNj4ZAgAgCgIDA2p9zvf0uS5M2HbU265TVnvfb9snuVbNhDzC5khHWn8lZ5pf0G9CkTs5wmZ2mcl6vaqe5VStGXJA0HLsnT5cr2OHXtFgcDcOpOSkrVv363Kzs5d0rpWqt6hST11pF0Hz3YrECbA8xiGtsW5tD8pXrmvLbUaOQ9Ujy+gxkm/OqYDc7oZxpUwsDJLZZWZSkhc/jBQkibHhtVw4gU1njmoYCDyUqiJqVmqqr1dpet3yu5wLmGFAAAAAAAgWggGAQBYRJZlaezVQ+r73n/LnJyc0+5IS1fORx5UQs2GZagOC+EPBXSw/ud6rvuwhozI+8IlBk3dYkvXzZt+S4k5FUtY4esmJyf0yisvqbOzPWx7ZeV6bdu2S04noU5T16h+ebhVJ+r7wy4CWup0aHe8W1vj3XJq/nB3KmSq+cregeOhN8eBhiHlF6eqvDJL5ZWZik9wLeJdvD2j3l7VHX1OrRePyjQjj+20nCJV7ziggrWb2YcSAAAAAIAbDMEgAACLJDgyot5Hvq2JUyfDtifvu0lZ7/9t2ePjl7gyLMRkYFIvXnhML/Sf0bjNUqRsKD0Q0m3ufO2rfb/c6YVLW+QVlmWppaVRhw8flN8/d8ZXfHyC9u27VXl5BctQ3cphWpbONg7qycNtqm8fntOeYBiqjXdrV5xbuc753xablqVu38zegV2+wJvCRcOQCkpmwsCyipUVBkrSQFez6o4+p87Gs5pvb8SckkpV7zig7KKKJZ/5CgAAAAAAlgbBIAAAi2Ds6BH1/vfDMsfH57TZU1KU8+GPKnHzlqUvDFc1Oj2i5879QC8PN2jaJinCBKkcf0jvSFyjnTvfL2dixpLW+EbT01M6fPhltbY2h21fu7ZStbV75HKtrHBqKQVDpg5f6NWTh9vUOTDxpjZD0lqXQ7vjPdrocclxlQBsImiqccqn5km/Js3XQzXDkApL066EgRmKi19ZX2/LstTTclEXjz6r/o7LEfsZhqHCdVtUteOA0nOKlrBCAAAAAACwHAgGAQB4G0JjY+r970c0fuxI2PakXbuV/dsflD0xcYkrw9UMjvfq6XM/0KsTrQoaRsRAsNgX0oH0Gm3Z817Z45KWtsi3aGtr0auv/lrT01Nz2uLi4rRnz80qLCxZhspWhilfUC+e6tIzx9o1NPbmmZRJNkM74zzaFe9WpsM+73lMy1LndECNU371+IKzc+xsNkMFJalaUzUzM9ATt/KWaDXNkNrrT+ri0Wc1MtAVsZ/N7lBZzS5Vbr9dSWlZS1ghAAAAAABYTgSDAABcp/GTJ9T78LcVGhud02ZPSlL2B/+3krbXLkNlmE/XcIueOvdDnfD1yjSMmalfYazzmXpnzg5VbbhXNpdniat8M7/fpyNHDqmp6VLY9tLSNdq5c588nuWtc7kMjfn07LF2vXCqU1O+1/fOs0mqcju1K96j9W6n7FeZHTgWDKlx0q/mKb+mr8wOtNkMFZWmaU1VlkrXZazIMFCSggG/ms+9qvrjv9LEqDdiP6c7Tms371fF1lvkSUhewgoBAAAAAMBKQDAIAMA1Ck1MqO/7/62xVw6FbU/cXqvsD35YjiQ+dF9Jmvou6KmLj+tcaGjmhTAhkWFZ2uC36Z1FN6ms+i4Z9uV/q9TZ2a5XXnlJk5MTc9rcbrd27bpJpaXly1DZ8usamNCTR9r0yrkehd6wzGea3aZdcW7tjHcr1T7/7MCQZal9OqDGSb/6/EFJM2FgyZp0lVdlqWxdhtyelRkGSpJvakKXT72kS6dekm9q7hh5TVxCiiq236o1G/fK6Y5bwgoBAAAAAMBKsvyfdgEAsIpMnD2jnu98U6Hh4TlttoQEZf/Oh5S0Y5eMq8xMwtKwLEsXO4/pqUu/0GUrcmhisyxtCzh1R/kdKlh3iwwjwrqiSygQCOj48VfV0HAxbHthYYn27LlJcXHxS1zZ8rIsS5c6RvTk4Tadujww+7pdUo3Hpd1xblW4nbJd5b/B4UBIjZM+tUwF5Lcs2eyGStZmzMwMXJsht2dlv02eHBtS/fHn1XT2kIIBf8R+SWnZqqp9h0qqa2V3rNyAEwAAAAAALI2V/YkHAAArRGhqSv3/8z2NvvxS2PaEzVuU86GPyJGaurSFISzTMnWq+SU93fKc2uWL2M9pWtplxutAxT3KLKldMYFub2+3Dh58QePjY3PanE6ndu7cp/LydSum3qVgmpZOXhrQk4db1dj1+vK9WXabdsV7tDPOrUT7/IFu0LTUOu1X46Rfg4GQ7HZDRWvTtaYqSyVrVn4YKEkjA92qO/acWuuOyTLNiP3Sc0tUveOACtZuXBFBNwAAAAAAWBlW/qcfAAAss8mLF9TzrW8o6B2c02aLi1PWB35HyXv3xVRIs1KFzJAONzypZzoPqs8IRuznCZnab6Tq9poHlJK3fgkrnF8wGNTJk0d18eLZsO15eQXau/cWJSQkLnFlyycQDOnguR49daRdvd5JSZJT0qY4l/bEeVTuvvosuEF/UI2TfrVO+2XZbSouT9fWqiyVrEmXy7063g73dzap7uiz6mo6N2+/3NJqVe84oKzCtfxMAgAAAAAAc6yOT0IAAFgG5vS0+n/0qEae/1XY9viaDcr53w/KmZ6+xJXhrXxBnw5eeFzP9R3XsM2SIuQhSUFTtzhzdMu29yk+o2Rpi7yKgYE+vfzy8xodHZnT5nA4tH37blVUVMdM2DMxHdDzJzr17PEOjU7MLJWZ57BrT7xbtXFueWzzz4Lzm5ZapmZmB47LUvGadN1WlaXi8tUTBgb80+q8fEaNZw5poKspYj/DMFRUuU1VOw4oLatgCSsEAAAAAACrzer4VAQAgCU22VCv3m/9pwL9/XPaDLdHWe/7gFJuviVmQpqVasI3rhfP/VAvDF3QhE1ShKwoPRDS7fGl2lv7frmTs5e0xqsJhUI6c+aEzp07Jcuy5rRnZ+dq375blZSUvAzVLb3BkWk9fbRdL53uki8QksuQ9sa5tTfeo3zX1d+69vmDapz0qTsYUuGaDO2ozFTJmgw5XfYlqP7tC4WC6mm5qNa64+pqPKtQMBCxr93uVNmG3aqsvV2JKRlLWCUAAAAAAFitCAYBAHgD0+/XwGM/0vCzT0thQpq4yirlfvRjcmZmLUN1eM3IpFfPnn1UB8ca5bMZEQPBXL+pAylV2rHnfXLErbxgzesd1MGDz2toyDunzWaza9u2Haqq2iDbVWbH3Qjaesf05JE2HbnQJ9OyVOZ06OaUBFV73HLZ5g/gfaap5km/WgJBZZSlqaKyWAfK01dNGGhZpvo7m9R68Zg6Gk7J75uct7/LHa+1W27Suq03yxOftERVAgAAAACAGwHBIAAAV0w1Narnm19XoKdnTpvhcinzPe9V6m3vkBEDIc1K1T/apafPPqoj050KGoYUITAq8Vu6I3OLNm96j2wuzxJXeXWmaer8+dM6ffq4TNOc056RkaV9+25VamraMlS3dCzL0ulL/Xr02Qadb/YqxTB0Z5xb2+LdynBe/W1qjy+gFn9QjsIkle8t1PbydDmdqyUMtDQ80KW2i8fUVn9Ck2NDVz0mLjFVldtvU/nGvXK63EtQJQAAAAAAuNEQDAIAYp4ZCGjwpz/R0JO/CDtL0LN2nXI/+rty5eQsQ3WQpM7BRj11/oc6GRiQaRhShCVcK/3SO/P3qmL9b8jmcC5xlQszMjKsgwef18BAmGVqDUObN2/Xhg1bbuhZgiHT1MunOvXYS41q7xjRRpdTn0hJVFmcS46rLM87GTLV6gsokJuoopoCHShLk2OVhIGSNDEyqNa642qtO67Rwe6r9jdsNuWWVKukulaF6zbLbuftOwAAAAAAuH58sgAAiGnTLS3q+ebX5e/qnNNmOBzKuP89SrvjTmYJLpOm7tN6su5nOm+NzrwQJjQyLEsbAw69s/R2lVa+Q4axMr9XlmXp4sWzOnnyqEKh0Jz21NR07d9/q9LTM5ehuqUxPhXQS6c69fKxDsVNBbUjzq0PZaUq2TF/sGdalnoDIU2ke5SxMVs7yjPkcKzM73M405Njam84pda6Yxrsal7QMZn55Sqp3q6iiq1yxyVGuUIAAAAAABArCAYBADHJCgY1+POfyfvzn0lhlnJ0l5Yp98Hfkzs/fxmqi22WZelC2yt6qvFJNWo6Yj+7ZWl7KE53rLtbeaW7ZVxlptlyGhsb1aFDL6q3d+4MMcMwtGHDZm3atF12++qZ+XYt2nrG9OxLTfK2Dava6dQHPR7lJDlkv8r3bCJkajjRqcSabFVUZsq+isLAgN+nzsYzaqs7rp6WOlnW3J8zb5WckaeS6u0qqdyuhJSMJagSAAAAAADEGoJBAEDM8bW3q+ebX5evvW1uo92ujHffp/S73iXjBg1pVirTMnXy8rN6uvVFddgCEfs5TUt7lKx3VN+rzIJNS1jhtbMsS5cuXdSxY68qGAzOaU9OTtG+fbcpKyt7GaqLrpHhKb16pF299QMqlKGbPS5lZKZcNcA1LUsjbrtc69KVszlXBatomVAzFFJPa51aLx5TZ+NZhYL+qx4Tn5Sm4qrtKqnartSsgiWoEgAAAAAAxDKCQQBATJlqvKyOv/+yrDAhjbu4RLkP/q7chUXLUFnsCoQCOnzxCT3bc1j9NlOKMCksLmRqvyNLt235LaVkrlnaIq/DxMS4Dh16Sd3dHWHbq6s3aOvWnXI4boy3Y6GQqZ6OUTXV92vg8qByLEPVHqd2JScs6PhpuyEVJytje77SE1xRrnbxWJapga5mtdYdV3v9SfmnJ656jMsTr6KKrSqpqlVmQdmKXf4WAAAAAADceG6MT6IAAFgg7y9/PjcUtNuV/q57lPEbvynjBglpVgNfYFq/PvcjPT94WsM2RQwEk4KmbvUU6ubt71N8yspf2tWyLDU1XdKRI4cUCMydMZaYmKS9e29Rbu7Kv5ermRj3qa3Rq/bGQfk6x5Rnt2utx6mNCXELOt40JFt+kuLWZyklL3FFLwf7VsP9XWqrO6bW+hOaHPVetb/d4VTBmo0qqa5VTkmV7HZ+1gAAAAAAgKXHJxIAgJhiT0x603NXfoFyP/Z78pSULk9BMWhiakTPn/2BXhpt0MQ8gWBGwNTtSWu1d8sH5IpPXcoSr9vU1KReffVltbe3hG1ft65KtbW75XSunhlxb2Salvq6R9XW6FVno1fuEZ8KPE5tdTvlTI5f0DlChmTkJChna76S1mZIDpu83qvPslsJJka9aqs7rta64xoZ6Lpqf8OwKaekUiVVtSpYu1FOl2cJqgQAAAAAAIiMYBAAEFMy3/NbMqen5O/uVlLtDqXd9S7ZnM7lLismjIz36dkz/6ODk23y2YyIgWBewNId6RtVu/m3ZHctLGxabpZlqbW1WYcPvyyfb3pOe1xcvPbuvUUFBatvmdrpqYDamrxqa/Sqv3lIWYahAo9Tt7gcsqUtbJlQn01yl6QoYU26HLmJMuw2JaUnyG63KRQyo3wHb49vakLtDSfVWndMA51NCzomI69UJdW1KqrYKk980tUPAAAAAAAAWCIEgwCAmOJISlb+7//hcpcRU/qGWvXMuR/oiL9XQcOQbOGXiywNSO/M2aGNG+6TzbF6wtrR0REdOXJQXV3h9xIsL1+rHTv2ye12L3Fl18eyLA30jqut0avWRq+m+8ZV4HGqzO1UberCgkBJGpYle1GycjfkKCUrflUtExoM+NTVeE6tdcfU3XJRlnn18DI5PUcl1bUqrtyuxNTMJagSAAAAAADg2hEMAgCAqOjovaAnLz6m06FhmYYhRQiGqgI2vbPoFlVU3SnDFmEa4QoUDAZ17twpnTt3SmaY4Mjj8Wj37ptUXFy2DNVdG990UB0tQ2pr9Kqtyat4f0iFHqd2eJxKykpe0Dksy1J7MKTJjDiV1+artCg1ukUvMjMUUm9bvVrrjqnz8hkFw+wP+VZxiakqqdqu4qrtSs0qWFXhJwAAAAAAiE0EgwAAYFFdbj+mpy49oQuanHkhTFhiWJY2hdx6Z/mdKlmzf9UFKh0drTpy5JDGx8fCthcXl2r37pvk8cQtcWULY1mWhgYm1drkVVvjoPo7RpXltKvQ49SdCR55khcW0AYtSw2+gFrtlvJqcrRrW4Hi3Kvn7aVlWRrsblbrxeNqbzgp39T4VY9xuuNUVLFVJVXblVW4RoaxesJsAAAAAACA1fPJDQAAWLEsy9L5phf0VPNzarJFnmlltyzVWol6Z+W7lVu0dQkrXBxjY6M6evQVdXS0hm2Pi4vXjh17VFJSvuLCzoA/pM62YbU2Dqqt0Sv/mF/5HofKPU7ty0qWI8ISr281ZZq64Avo7LRftpwE3bK/TO9fkyHbCrvf+YwMdqv14nG11R3TxKj3qv3tdqfy12xQSdV25ZZWy76KlroFAAAAAAB4I4JBAABw3UJmSCfqfqlnOg+q0x6SIkyecpmW9tjS9Y4N71FGTsXSFrkIQqGQzp8/rbNnTyoUCs1pNwxD1dUbtXnzNjmdrmWoMLyRoanZILCrbVgeGSpwO7TT41RWjmfBYd5wKKSz036dm/ar3TS1e0Ou3ltbpILMhe85uNwmx4bUVndCrXXHNNzfedX+hmEop7hSxdW1KlyzUU73ypz9CQAAAAAAcC0IBgEAwDULBP169fxjerbvhAbslmQP3y8+ZGq/O0+3b3y/ktIKl7bIRdLV1aHDhw9qbGwkbHt2dq527dqvtLT0Ja5srlDQVFf7sNoavWpt8mrEO6VUx8wSoTVpCUpzLvytX1cgqHPTfp2d9qszGFJ6slvv2FuiT23OV2Lc6pgx55uaUMelU2qtO67+jkZJ1lWPSc8tUUlVrYort8qTsLD9FQEAAAAAAFYLgkEAALBg0/4J/frMo3p+6KJG7IoYCCYHTd2WUKabtnxAcQkZS1rjYpmYGNexY6+otbU5bLvHE6ft23epvHzdsi4bOjYyrbYmr9oavepoHVIoYCrL5dAaj1MFWclKdCxsDzzTstTsD+qsb2ZmoDdkSpLWFqboE7VF2laRKbtt5e+nFwz41dV0Tq11x9TTfFGmOXeG51slpWWrpLpWxZXblZSWtQRVAgAAAAAALA+CQQAAcFVjY4N64pVv6cXxJk3ajYiBYEbQ0oHUau3e9F65PElLW+QiMU1TFy6c1ZkzxxUMBue0G4ahior12rq1Vi6Xe8nrC4VM9XaOqq3Jq9ZGr7z9E7IbUp7bqdp4j/I9DrkXGOAFLEt1voDOTft1wefXhDkzo85uM7SnJld37ChUae7KnzVnmiH1tjWore64Oi6dVjDgu+oxcQkpKqrappKqWqVlF664PSEBAAAAAACigWAQAABENOBt109f/S+9ON4uv82Q7OHDk/yAdCB7q2o3PiC7Y+nDssXS09Olw4cPamRkKGx7Zma2du3ar4yMzCWta3LCr7ZGr9qavGpv9srvC8ltM1TgnlkiNNftkGOBwda4aerClf0CG/wB+d+wumZyvFO3bi3QbVsLlJK4sr+PlmXJ29Oq1ovH1NZwUr7Jsase43THqXDdZpVU1SqrcK1sq2AGJAAAAAAAwGIiGAQAAG8SDPp1tuFpHeo+qjpNyjQMyRY+dCoL2vTO/H3aUP0u2ewRphGuAlNTkzp27FU1N18O2+52u7Vt2y6tXVu5JDPLTNNSf8+YWhtnlgjt75kJvRLtNpV7nCpIjFOW077gWgaDIZ2b9uucz69mf1DmW9pLcpJ0oLZQO6tz5Fzg0qPLZdTbOxMG1h3X+MjAVfvb7A7ll9eopKpWeWXrZXesjv0RAQAAAAAAooFgEAAASJK6ei7o0KUndXS6W+N2QzKkK/9njuqQU+8sfYfWrb1tVS/BaJqm6uvP69SpYwoEAmH7rFtXpa1bd8rj8US1lumpgNqbh9TWOKi2piFNT83Uk+60a2OiR4Uep1KdCw9fO4MhnZ3y6ey0X93BufvsGYa0vSJLB2qLtK4wZUV/HyfHhtVWf0Jtdcc01Ndx1f6GYSi7qELFVdtVuG6zXO64JagSAAAAAABg5SMYBAAghk1NDuvoxZ/pVe8FtdqvhEcRlgs1LEtbrHi9s+I3VFy8cwmrjI6+vh4dPnxQQ0ODYdvT0zO1a9d+ZWVlR+X6lmVpsG9CrY2DamvyqrdzVJYl2SRluxwqSI5TocepePvCZvBZhtRrk14ZmtC5ab+GQm+dFzgj3u3QzVvydfu2AmWmrNzAzD89qY5Lp9Vad0x97ZclWVc9Jj2nWMXV21VcsU1xiSnRLxIAAAAAAGCVIRgEACDGmKapS82/1iutv9Zpc+TK3oGR+7tMSzucabq98j7l5q5fukKjZHp6SsePH1ZjY0PYdqfTpa1bd6iionrR96Dz+4LqaBlWW9Og2hq9mhj3S5IchlTodqrQ41S+2ylXhKVb53DYNJLo1CveCb3cN6pJK3J4lpcRrwO1Rdpbkyu3a2Uu+xoKBtTVdE6tdcfV3XxeZmjuTMe3SkzNUkl1rUqqtispLTohLgAAAAAAwI2CYBAAgBgx7G3TobondHi8RQOvvQOYJ4AqCdp0S+5G3brztxQXny6vd2JpCo0S0zR16VKdTp48Kr/fF7bPmjUV2rZtl+LiFmcmnWVZGh6cnNkrsMmr7vYRmeZMeOexGVoT71Kh26kct0P2BS7laXgcsnITdGbar5829Mnb7p+3/6Y1GTpQW6ia0vQVuVyoaZrqaa1Xa90xdV46rYB/+qrHeBKSVVy5TSVVtUrLKVqR9wUAAAAAALASEQwCAHADC/qndKb+Kb3Sc0x1Np9Mw5j3t39iyFKtO0d7175TBfmblJ6eILvdplCEZSlXi4GBfh0+/LIGB/vDtqempmvXrn3Kycl729cKBELqahtWW6NXrY1ejY28HnQl2W0qTHCp0ONUpmvhb8NsSS45i1M0kODQk5f6deRws0Jm5NmBbqdd+zfm6R21hcpNj39b9xMNlmWpr6NZl88c1qUzRzQ5PnLVYxwutwrXbVFJVa2yi9Yt+mxOAAAAAACAWEAwCADADcayLHV3ntLBy8/oWKBP43bblaVCw8+qslmWqiyP9uTt0KaKO+Vwupe03mjy+aZ18uRRNTRcDNvucDi1Zct2VVVteFtB0+S4X82XBtRyaVCdbcMKBV8PUjOcdhV6ZpYJTXYsfAlPe2a8nMXJshUk6XTvmJ453qHLHfMHaJkpHh3YXqj9m/IV71lZb/OCAb/62hvU1XReXU3nNTU+fNVjbHa78spqVFJVq7yy9XI4XdEvFAAAAAAA4Aa2sj4xAgAA121yYkDHzv9Mrw7VqdV5ZTaZPXLYlRmUdieXa3fVPUpLLVyiKpeGZVlqbGzQ8eOH5fOFX5qytHSNamt3Kz4+4bquMTo8reaGATU19KunY3T2dZukPLdDhR6nCtxOxc3zPXgTmyFHbqKcRclyFqVo0ib96nSXfvX9y/KOhl/69DVVxam6o7ZIm9dmyrbQ/QmXwMSoV11N59XdfF59bZcUCgUWcJSh7KJ1KqnarsJ1m+XyrLwZjwAAAAAAAKsVwSAAAKuYGQrq8uXndaj9kE5rXH6bITkj93eZljbb07S35GatLd17Qy7H6PUO6vDhl9Xf3xu2PSUlVTt37lNeXsE1n3tocFJN9f1qqh/QQO/47OtOQ8p3z8wKzHM75VxoOOe0yVmQLGdxspwFyTJcdnUOTOi5Xzfq0Lke+YORl3B12G3aXZOjO2qLVJSdeM33Eg2maWqwu0XdTefU1XReI4PdCz42LbtQJdW1KqrYpvik1OgVCQAAAAAAEMMIBgEAWIWG+y/rlbqf6/Bkh/qdxsw0tQhLhUpSScihPZkbVFt9j+I8yUtW51Ly+/06ffqY6urOy7Lm7r/ncDi0ceM2rV+/UXb7wpb0tCxLA73jMzMD6wc0NDg52+a2GSpwO1XkcSrH7ZDdWFgYaMQ55CxKkbM4WY7cRBl2m0zL0pnGQT17rF3nW4bmPT410aXbthXqli35So5f/qU1fVMT6mmtU3fTeXW3XJB/evLqB12Rkp6ttZt2Kbt0k5LTc6JYJQAAAAAAACSCQQAAVo3g9LhOX/y5Xu07rTpHQKZhzExViyAxZGlHXIH2rrtL+TlVS1jp0rIsS83Nl3X8+KuampoK26e4uFS1tXuVmHj1mXWWZam3c1RNV8LAsZHXlyKNtxlX9gt0Kctll22BYaAtxS1ncYqcRSmyZ8bJuHLclC+ogyc79dzxDvUOha/9NeX5yTpQW6jaymw5Fro8aRRYlqXRwR51NZ9Xd9N5DXQ1y7Iiz2x8I8OwKTO/TPnlG1S1tVaZuQUyTUte70SUqwYAAAAAAIBEMAgAwIpmWZa6Wg7rUPPzOhYa1LjddmWp0PCBlM2yVGUkaG/+Lm1cd0AO+zzrit4AhoeHdPjwy+rtDb9kZVJSsnbu3KuCguJ5zxMKmepuH1FTw4CaGwY0Oe5//Rx2m4o8M8uEZrgW/tbJnhX/ehiY4n5TW9/wlJ471qGXz3ZpyheKfA6bodqqbB2oLdSa/JQFX3uxhYIB9bVfmt0vcGLUu+BjXZ545ZWtV35ZjXJLq2f3DExPT7gSkM6d3QkAAAAAAIDoIBgEAGAFmhzp0tELT+jwyGW1vrZa5DyzxDJDhnanrNOe6nuUmpS7NEUuo0AgoDNnjuvChbNhlw212ezauHGLNmzYLLs9/NudYNBUR8uQmur71XJpUL7p4GxbmsOuQs/MMqEpzoUtOyqbIUde4pUwMFm2uDeHspZlqa51SM8c69DpywPzxmGJcU7dujVft20tVFqSe56e0TM5Nqzu5vPqaj6v3tYGhYL+qx90RUpmvvLLa5RfXqP03NIbci9LAAAAAACA1YhgEACAFcIM+HSp4Vm90nlYp21T8tsMaZ4t5JympS3ODO0tu13rinbMLk95I7MsS21tzTp69BVNToZffrKgoFg7d+5VUtLcvRQD/pDamrxqqu9Xa6NXAf/rs/UynfbZmYGJjgWGgQ6bnAVJM2FgYbIM19zj/IGQXr3Qq2ePtaujf/4lMwuzEnRHbZF2rc+Ra6GB5CKxLFPenjZ1NZ1XV9M5Dfd3LvhYu92p7OIK5ZevV15ZjRKS06NYKQAAAAAAAK4XwSAAAMvIsiwNd1/UK5d+qSPT3ep32q78do4c8pWYLu3J3qzaqncpzpWwZLUut9HRER0+fFDd3R1h2xMSErVjx14VFZW8KST1TQfUcmlQTfUDam/2KhSamatnSMp1Oa7sGehU3AL37TNcdjmKkuUqTpEjP0mGI/xxQ2M+/epEh1481aXxqUDk80nasi5TB2qLVFWcuqQBr983pd7WuitLhF6Qb2p8wcfGJ6XNLBFavkHZRevkcM6TYgMAAAAAAGBFIBgEAGAZBCaGdObiz/XqwDnVOUMyDUNyRg6mEk1pR3yR9lberfyMtUtY6fILBoM6e/akzp8/LdM057TbbDbV1GzSxo3b5HDMvLWZHPer+dKAmuoH1NU2LNOcCQPtkgrcM0uEFngcci1wiUsjzjEzK7A4RY7cRBm2yOFdY+eInjnWruP1/QqZkRcMjXPbddOmfN2+vVDZqXELquPtsixLY0N9M0uENp1Xf2ejrDBf03AMw1B6XunMEqFlNUrJzI+JWaoAAAAAAAA3EoJBAACWiGWG1NX0sg61vKRj5ojGHbYrS4WGD1dslqUqW5L2Fu3VxrJb5YiwV96NrL29VUePHtL4+FjY9ry8Au3cuU8pKakaG5lWU32Pmhr61dMxOtvHaWh2idB8t1OOeUK9N7IlumbCwJIU2bPi5w3Bxib9OnlpQC+e6lJz92jEfpKUkxanA7VF2rshV3Hu6H9PQ6Gg+jsa1dV0Tt3NFzQ+3L/gY53uOOWVViuvrEZ5ZdVyxyVGsVIAAAAAAABEW+x9wggAwBKbGGzRsYs/1+HxZrW6bJJN0jwz1bJMm3alVWl31W8oLSFr6QpdQcbGRnX06CF1dLSFbY+Li9eOHXuUnJiry+cH1NzQqP6e15fBdNuM2ZmBOW6H7Auc2WZL9bweBqZ55g0Dh8Z8OtHQr+P1fapvH5YVeXKgJKmmLF131BZqQ3mGbFGeaTc1Maru5gvqbjqvnrY6Bf2+BR+bnJ6j/PINyitbr8z8ctnsS7vXIQAAAAAAAKKHYBAAgCgwfZNqqHtKr/Qc0xm7T36bTXJFDgOdpqUt7mztKz+gtflbYnaJxlAopPPnT+vs2ZMKhUJz2g3DUGlxpRxWnl591quhgdf3G4y3GSr0OFXkcSnTZV9w+GbPjJ9dJtSe4p6378DwlI439Ot4fb8aO0d0lSxQLqdNezfk6R3bC1WQGb39IC3L1FBfx8xegU3n5e0NH6iGY7PblV24TnlXlghNTM2MWp0AAAAAAABYXgSDAAAsEssyNdR+Uq9eflZHAn3qd9olpzQzRTC8Erm1J2e7aivuVJxzafaZW6k6O9t15MhBjY2FX4rT40qVbzhb516xJHVJkpLsttllQjNcC3xbY0iOnITZMNCW4Jq3e493Usfr+3Ssvl+tPeGXNH2r9GS33rGtUDdtzldinHNhdV2jgH9ava316mo+r+7mC5qemH8J0zfyJCQrv6xG+eU1yi6ulNM1fyAKAAAAAACAGwPBIAAAb1NgtE9nLj6hVwcvqs4tmYYhOSMvv5hoGtqRWKq9lXcrP6106QpdoSYmxnX06Ctqa2sO226ZDvlHMjUxlSzJUJrDfmVmoFMp83yd38RmyJGXKGdJipxFKbJ5Ir8FsixLnf0TOlbfp+MN/ersn1jQJVxOmzaWZ2hXdY62VmTKPs9ysddrfLhfXU0X1NV8Tv0dl2WGmVUZnqH03GLll9cor6xGadmFMTsrFQAAAAAAIJYRDAIAcB2soF+dl17UobaXddyY0LjDJnkiBy02y1KVI1V7i27SptJ9stvYty0UCunixbM6c+aEgsHgnHbLkoITqQqMZSrD4VJRklOFHpcSHQsM3Bw2OQuSZmYGFibLcEX+mluWpZaeMR2vn9kzsHdoakGXiHPbtXlNprZXZmlDeYbcCw0qF8gMhTTQ1aSupvPqaj6vMW/vgo91uNzKLameCQNLq+VJSF7U2gAAAAAAALD6EAwCALBAlmVpsrdBx+qf1OHJNrW6r75UaKZl1+6MDdpdebfS4tKXrNaVrqenS4cPv6yRkeGw7abfo8SJfJU4k1WY5VScfWFhoOGyy1GULFdxihz5STLmCRFNy9LljhEdr+/XiYY+DY76FnSNBI9DWyuytL0iS+tL0+VcaFC5QL6pcXU3X1BX03n1tNYp4FtYSClJialZyi+vUX75BmUWlMtu560eAAAAAAAAXsenRQCwTM6fP69Dhw7p7NmzOnfunDo7OyVJzz33nAoLCyMe92d/9md67LHHIra///3v1xe+8IVFrzeWhSZHdOnik3q176ROO4Py22ySO/LMMKclbfXkau+ad2ptTg1LNr7B5OSkjhw5pLa2prDthmlXfqBAGxzZcqcubPadEeeY3S/QkZsowxb56x0yTdW3DV8JA/s1MuFf0DVSElzaVpGl7ZVZqixOXdRlQi3L0vBAl7qbzqmr6bwGu1slWQs61rDZlF24VnlX9gtMSstetLoAAAAAAABw4yEYBIBl8i//8i967rnnrvv4/fv3Kysra87rW7dufTtl4QrLDGmo+bBebXpeR0Je9bvskluab3ZgiRGnPXk7VbvmHYpzepas1tVgfHRaRw4fU0d3vaQw++JZUpGyVGMUyeVyXvV8tkTXzH6BxSmyZ8XPG74GgqYutnp1rL5fpy4NaHwqsKCaM5Ld2laRre2VWVpbkCLbPIHjtQoG/Optq5+dGTg1PrzgY93xScorW6/8shrlllTK6Y5btLoAAAAAAABwYyMYBIBlsmXLFlVUVGjDhg3auHGjHnjgAQ0MDCz4+I9//OPatWtXFCuMTf6hdp298Au9OtygOo9Npt2Q7JFnriVahnYkr9HeiruVn1K0hJWufGMj02pqGNCluiaN+Ztld4ZfqjPFitcmlSlNifOez5bqkbMkRa7iFNnSPPOGgb5ASOeavDre0KfTlwc05QsTRoaRnRan7ZVZqq3MVmlu0qLO9pwY9aqr6by6m86rr/2SQqGFBZSSlJZdqPzyDcorq1F6bpEMY3GXLwUAAAAAAEBsIBgEgGXy8Y9/fLlLwBWWf0od9c/plc5Xddw2pXGHXYqLHAYalqVqZ7r2lt6iTYW7ZLctbMnLWDA0OKnmhgE11ferv29YruR+OeNHZQ8zCdBp2VWlIpUqW4bCB3D2zPjZZULtKe55rz3lC+pM46CO1/fpTNOg/AFzQTUXZCZoe2WWtldmqzArYdHCQNMMabC7ZTYMHBnsXvCxDqdLOcWVyiuvUX5ZjeISUxalJgAAAAAAAMQ2gkEAQEyyLFMTHed0/NLTOjzdpVaPQ3JJUuSQL1NO7c7cqN0VdynNk7pUpa5olmVpsG9CTfX9amoY0NDApCRLjvgRJWT3S7bw4VyRlan1KpZbb0kMDcmRk3glDEyWLcE17/UnpgM6dWlAx+v7da7Zq2BoYWFgSW6SaiuztK0iS3kZCQs6ZiF8UxPqaa1TV9M59bRclH96csHHJqRkKL+sRnnlNcouXCu74+pLqgIAAAAAAADXgmAQQMwYHx/XwYMHdfjwYV24cEEtLS0aGxuT2+1Wdna2Nm3apHvuuUc33XTToi4fGC3PPPOMnnnmGfn9fuXl5Wnfvn3atGnTcpe14lmWpeZTP9aLXa/qjNuS32aTPJF/HTotaWt8gfauuUNrs6pXxdiINsuy1Ns1qqb6ATXVD2hsZHq2LcXjlz2lRz77VNhjk6w4bVKZMpT0+os2Q478K2FgUYps83w/JGl0wq8Tl/p1vL5fda1DCpnWgupeW5AyMzOwIkuZqYuzL59phuTtaVVPa716Wuvk7W6VZS0snDQMmzILypVfVqP88holpecwvgAAAAAAABBVBIMAYsK3vvUtfe1rX5PPN3ePs2AwqObmZjU3N+vxxx9XbW2t/v7v/175+fnLUOnCPfLII296/tBDD+mWW27RV77yFaWmpi5PUavAsSPf0cPj52XGGVKE5SslqcSWoD0Fu1VbdoviHJ6lK3CFMkOmOlqG1NQwoOaGAU2O+yXNfAUznXblewxNxfepy9avYJgvq8OyqVKFKlOubDIkh03OgiQ5S1LkLEiW4Zp/OVbv6LSON/TrRH2/GjqGZS0gCzQMqbIoVdsrs7WtIktpSfMvRbpQ48P96mmtU09LvfraGxTwT1/9oCtcngTllVUrv3yDckuq5PLEL0pNAAAAAAAAwEIQDAKICc3NzbOhYE5Ojvbu3auamhplZGTI5/Pp1KlT+ulPf6rJyUkdO3ZMH/rQh/Too48qIyNjmSufq6qqSp/73Oe0e/du5eXlyev16siRI/rHf/xHvfjii/r93/99ffe735XNZlvuUlekV0cvy7SHDwQTLJt2plZob8Wdyk8qWOLKVh6/L6j68726fKFP9ed7NDUZkCTZJOW6HCryOFXgcajfPqgLapffCIY9T4GVoRoVK84VJ2dRspwlKXLkJclwzD9G+4andLy+T8fr+9XUNbqgmu02Q9WlaaqtzNaWdZlKjp9/KdKF8E9Pqq/90kwY2FqniZHBazo+JTNf+eUblF++Xum5pfy3CQAAAAAAgGVDMAggJhiGof379+vBBx/Unj175nwwf//99+vjH/+4Pvaxj6m5uVkdHR36h3/4B33pS1+ac67PfOYzOnPmzDVd/4477tCf/MmfvK17eM1HPvKRNz0vKCjQ/fffr7179+rd7363Tp48qaeeekp33333olzvRpOXXKC6iebZ54YlVbsytLfsVm3Kr5XdNv/MtRuZZVka6B1Xe/OQ2pu86ukclXllmU67pEK3U4Uepwo8TrlshkY0oWO6rCFjPOz5Ei2PNrnWKL+sZCYMzEmUYZt/qcyugYnZMLCtL/x538rpsGlDWbq2V2Zpy9pMxXve3t58ZiikwZ5W9V4JAr09rbIWMkXxCrvdqeziCuWX1yivbL0SktPfVj0AAAAAAADAYiEYBBAT/viP//iqy2sWFBTooYce0r333itJ+uUvf6m/+qu/Ulzcm/ci6+7uVnNzc7hTRNTf339N/a9HTk6OHnjgAX3zm9/USy+9RDAYwT3bH5Rx9nvqnxxQSWqpdq+5Q2melOUua9lMTvjV0TITBLY3D83OCnxNocepUo9TeR6nHFf2vwsoqLPqULN6w67GapdN67MrtH7LFrlykubdN8+yLLX3jetYfb+O1/epe3ByQXW7XXZtXpOhbRVZ2rQmQx7X9b+lsSxrdnnQ3tZ69bY3KOifu+zwfFIy85VTUqnckiplFayRw/n2ZyoCAAAAAAAAi41gEEBMWOiee1VVVSorK1Nzc7OmpqbU2tqqqqqqN/V5695+K0lpaakkqa+vb3kLWcE8Drfes/Ujy13GsgmFTPV2jqq9eUhtTV4N9M6dlWdIKolzan2CRynO12dQWrLUqUGdV5t8RmDOcZJUlFusHXv3KTExKWINpmWpuXtUx6+Egf3DC9ujL97t0JZ1mdpemaWa0nS5nNc/u9M3NaG+9gb1tNart7VOE6PeazreE5+knJJK5ZRUKbe4UnGJsRsuAwAAAAAAYPUgGASAt0hMTJx9/Nq+hKvFyMiIJM2Z5YjYNjo8Nbs8aEfrsAL+UNh+hqSyOJfWJ7qV5Hhz6DamSZ1RiwaNsbDHJiUla+fOfSooKArbbpqWLnUM61h9v0409GtobGH/bSXFO7V1XZZqK7NUVZImh/369uczQyENdrfM7hM41Nt2TcuD2uwOZRWuUW5JlXJLqpSSmT/vTEgAAAAAAABgJSIYBIA38Pv9amlpmX2en5+/fMVcI8uy9PTTT0uSNmzYsMzVYDkF/CF1tQ+rvWlIbc1ejXin5u1vk1Qe71J1gkeJjjcHb0GFVK9ONalHljE3SLPb7dqwYYs2bNgsu/3NbyuCIVN1bUM6Xt+vkw39Gp0MP8vwrVITXdpeka3tlVlaV5Qiu+3aw8DZ5UFbZoLAvvZLCgaufXnQ3JIq5ZZWKTO/nOVBAQAAAAAAsOoRDALAGzzxxBMaG5uZEVVTU6OsrKxlrujNLly4oMbGRt15551yuV4PKcbHx/WlL31JZ8+eVXx8vN7znvcsY5VYapZlyds/MTMrsNmrrvYRmaGrz4azS1oT79L6RI/i3jITz5SldvWrXh2ajrBsaEFBsXbu3KukpOTZ1wLBkM43D+l4fZ9OXR7QxHRwQfeQmeLR9sosba/MVnl+smzXMRvPNzWh3rZ69bbWq6e1TpNjQ9d0vCchWTnFM/sE5pRUKi4h+eoHAQAAAAAAAKuIYV3LOloAcAPzer265557NDg4KEn653/+Z91xxx1Ru94LL7ygf/3Xf519fuHCBQUCAVVXV8+Gfrfccov+8A//cLbPs88+qz/8wz9USkqKNmzYoLS0NA0MDOjixYsaGRlRfHy8HnroId1yyy1Rq3s+oZC5LNeNRVOTfjVfGlRjfb+aGvo1Prrw2XAOQ9qam6wyu132twSIliz1aEgX1a5xI/zef0lJybr11ltVXl4uwzA07Q/qRF2fXjnbrWN1fZryLSwMLMhK0J6NedqzIU/lBSnXvDRnKBhUb3uj2i+fV8flC+rrapGu4W2N3eFUflmlCtesV9HaGqXnFLA86BKy2QwZhiHLsmSavB3F4mBcIRoYV4gWxhaigXF1Y7Nf59YKAAC8ETMGAUAzS4h+6lOfmg0FDxw4ENVQUJoJIk+fPj3n9YsXL84+Li8vf1NbZWWlPvShD+ns2bNqaGjQ8PCwnE6nCgoKdN999+nDH/6wCgsLo1r3fPhHSvSYpqXOtmE11vepsb5fXW3D15KBKT7BpbVrM7QuwS1317gsX1B6Syg4qDFdVJu8xnjYc9hsNu3YsUO7du2SPyi9fLpLh85263hdn/yB8PsWvlVZfrL2bsrX3o15Ks69thl5lmVpuL9HbZfOqe3SeXU11yngv7blQTPzilW8rkZF62qUV1Ihh9N5Tcdj8RmGIbudQBaLi3GFaGBcIVoYW4gGxhUAAIiEGYMAYp5pmvrMZz6jn/3sZ5Kk4uJi/fCHP1RKSsoyV7b6MGNwcY2OTKupoV9N9f1qvjSo6amF7dEnSYbNUGFxqsors1RWmqa4nnGNnu6R6Zsb4I1pUhfVoR4j8tKbJSUlqt25T/VdPr1ytlunLw0ouMDv97qi1NmZgXmZCQu+B0mamhhTZ9NFtV86r47GCxof8V7T8fFJKSpaW6PCNTUqXFut+ET+u14p+Gt2RAPjCtHAuEK0MLYQDYyrGxt/jAsAWAwEgwBimmVZ+qu/+is9+uijkqT8/Hw98sgjyzrrbjUbGppUMLiwmWOYKxg01d0+rPbmIbU1eTU0MHlNxyclu1VUnq6isjQVlKTJaVnyXeiXr25QCs4N8abkV7061Gb0RzxnekamErIrdbo1pHONgzIX8LbBkLSuMEXbK7O1vTJL6cmeBd9DKBjQQFezettm9gkc6u2QdG3Lg2YVrlVuSZVyS6qUnJHL8qArVHp6gux2m0IhU17vxHKXgxsE4wrRwLhCtDC2EA2MqxuXw2FXWlr8cpcBALgBsJQogJhlWZY+//nPz4aCubm5+s53vkMoiCVjWZaGvVNqb/KqvXlIXW3DCoYJ8CJxOGzKL05VUVmaisrTlZoeJ8MwZE4G5Dvbq9H6wTnLhUpSQEFdUpeajF6ZCn89pztew1a+XjovmWd7r1qLzTBUVZKq7ZXZ2rYuUymJ7gXdg2VZGvX2qKe1Tr2t9eprv6xQ0L+gY1+Tml04GwRm5pfJ7mB5UAAAAAAAACAcgkEAMcmyLP31X/+1vv/970uScnJy9PDDD6u4uHiZK8ONzjcdVGfrsNqbvWpv8mps9Nr2yEvLjFdxWbqKytOUV5Qqh+P1pWTMcb+mzvXJf8krhVk2KCRTzerVJXuXAmYw/AVsTrWNp6u5K/Gq8/QcdkPrS9O1vTJLW9dlKTFuYYHc9OSYetsaroSBdZoaH1nQca+JS0xRzpUgMKe4Qp74pGs6HgAAAAAAAIhVBIMAYs5roeD3vvc9SVJ2drYefvhhlZSULHNluBFZlqX+nvErQeCQejpHdC2LeLvcjpkZgVf+lxhmWc7QmE++s33yNw6FDQQtWeowBlRv79JkaFrhJgmalk1t4ylqH09TyIq8b4XLYdPG8gxtr8zS5rWZinNf/a3Ea8uD9rTWqae1TsN9HVc95o3sDpeyi9Yqp6RyZnnQdJYHBQAAAAAAAK4HwSCAmPLWUDArK0sPP/ywSktLl7cw3FAmJ/xqbx6aWSK0ZUjTk4FrOj47P0lFZekqLk9Tdl6ybLbwIVhoZFrTZ/sUaBoKuw2fJUt9xojqXJ0a8Y9LYbZ/tCypazJZrWPp8pvh3xa4XXbtXJ+rjaVp2lieIbfLPm/9lmVpZLBbva0z+wT2d1xWKHgtXwNDaTmFV2YEVrI8KAAAAAAAALBICAYBxJQvfOELc0LBsrKyZa4Kq10oZKqnY3R2VuBA3/g1HR+f6JpdHrSwNE2eqyzJGRqa0vSZPgVahiP2GTImVBfXpf4prxRhy76+qQQ1j2ZoKuQK276uMEV37inVTVsK5Hba5fVORLze9MSoetrqZ8PA6YnRee/hreISU6/sE1ipbJYHBQAAAAAAAKKCYBBAzPjiF7+o7373u5JeDwXLy8uXuSqsVqPDU2prmpkV2Nk2rIA/zHS8CGx2Q3mFKSouT1dRWZrSsxIWtDRmcHBSvjN9CrRF3pNv3Dat+oRedY73SFPh+wz7PGoczdRYYO6ypGlJbu3dkKv9G/OUkx6v9PQE2e02hUJvXn80FAyov7NJva8tD9rfedX638jhdCmrcN1MGFhapaS0bJYHBQAAAAAAAKKMYBBATPja176m//qv/5IkGYahD3/4w2pqalJTU9O8x61fv175+flLUSJWuIA/pM624ZnlQZuHNDIUIXWLICUtbjYIzC9OlfMqy3G+UbB/QtOnexXsHIvYZ9oe1OXkATWPtMkaD7+J4XjApabRDHl98ZJeD+Ecdpu2VWRq/8Y8rS9ND7t0qWVZGu7vUk9rnXpb69Tf0ahQ6FqXBy2anRWYkV8mu523IQAAAAAAAMBS4hM5ADHhxIkTs48ty9JXv/rVBR33pS99SQ888EC0ysIKZlmWvP0TarsSBHZ3jMgMhQ/cwnG67CooSZ0NA5NT4665hmDPuKbP9CrYHXlp0oDDVEv6sBq8zQoNB8P2mQ461DyWrt6pJL0xECzNTdL+TXnatT5HCZ43L19qmqZGvT3qbepRd2uD2i+d1+R45JmK4cQnpSmnpHJ2r0B3XMI1HQ8AAAAAAABgcREMAgBwxfRUQO3NQzN7BTYPaXI8wuZ8EWTmJKqoPE3FZenKKUiW3W675hosy1Kwe1zTp3sV6ou8p5/lNNSePa6LA5fk6/eF7RMwbWodS1PXRIpMzdSSHO/U7ppc7d+Up8KsxNlrTo4NabC7Vd6eVg32tGqot03BwLXdv8PpVnbRutkwkOVBAQAAAAAAgJWFYBBATHjkkUeWuwSsQKZpqbdrdHZ50L7uyEt1huOJd6qobCYILCxLU3yC67prsSxLwc6xmUBwYDJyR5dNvfl+neqtk68rfHAYsgx1jKeofTxNQcsuu83Q1jUZ2r8xTxvXZMgK+uTtbdeFIy3ydrdpsKdF0xOj11G1ofTc4pkZgSWVysgrZXlQAAAAAAAAYAXj0zsAQMxpvTyourM96mgZkt8XWvBxNpuhnILkmTCwPF2ZOYlve0acZVkKtI3Kd6ZXIW/kfQsNj0O9eUEd6zqnYGv4ANOypJ7JZLWMpctnOlSQlaB9NdmqybXkG+nUYPNZPXOoRaPePkkLXxb1jeKT02f3CcwuqmB5UAAAAAAAAGAVIRgEAMSU8ye79NJTlxbcPynFMxsEFpSkyuVenF+dlmkp0Dqs6TN9MoenI/Yz4p1qSgnoRO9ZOVsiz+obmEpQ01i6PHZL+0v9KkzwKjDareHDHToUClx3nYkp6copKld+aaWSssqUmJrF8qAAAAAAAADAKkUwCACIKU31A/O2Oxw25ZekzoaBKWlxixqEWaalQPPQTCA4Gn5vQEky4xw66fDp7OBFpU+Nyhmh34TfpvFxv7JdnSpyn5UZmJQ6pJ7rqM3pjlN6TrEy8kqUnjvzv4KiPNntNoVCprzeyHseAgAAAAAAAFj5CAYBADElvzhFHS1Db3otPSthNgjMLUyRw2Fb9OtaIVP+xiH5zvbJHPdH7DflsumFqXG1DLQpL2FE6e4I5wv65ZzsVpp/TOmSNC2Z11CPzWZXSlaBMnKLlZ5XqozcEiWlZckwFv/eAQAAAAAAAKwMBIMAgJiybU+x4uKdGvZOKS0jXkXl6UpMipC+LQIrZMp/yavpc32yJiIv6Tlsk34+NKIBW5+KEodUkBhhD8BQQI7JPtl8Q7qWeYyJqVlKzy1WRm6p0nOLlZZdKLsj0jxEAAAAAAAAADcigkEAQEwxDEPrt+RH/TpW0JSvflC+832ypoIR+3WHQnp2dEgT9nblJPlUZo8Q95kh2af6ZZ8alKEIoeEV7rgEpeeWKCP39SVB3XEJb+d2AAAAAAAAANwACAYBAFhEViAkX92gfBf6ZU1HDgR7AuM6N3lR40aXUlPSlGJ3S+HmAFqm7FNe2af6ZVihOc12u1OpOYXKeEMQmJCSsaj7IgIAAAAAAAC4MRAMAgCwCEx/SP6L/fJdGJDlnxvgvcYb6FHT1Cn1a1jBhBzZnLnh5/9Zlmy+YTkm+2SYry1Baig5I+dNswFTM/Nls9ujcUsAAAAAAAAAbjAEgwAAvA3mdFC+C/3y1Q1IATNiv0F/pxqnTmnQGlIoIVemqyxiX5t/TPaJHsV73Mooq1ZG3kwImJZTJJc7Lhq3AQAAAAAAACAGEAwCAHAdfCNjGjveKltHQDbLFrFfv79djVOnNGwOKRifLdO9VoqwzKfLMFWUnaGS8l1Kzy1WfFJatMoHAAAAAAAAEIMIBgEAuAozFNLwQKe8Pa0a6ehUfL9H2SqSw3BICh8K9vpb1DR5WiPmkELxWQp51klG+L4J8QnaXrtbJSXl7A0IAAAAAAAAIGoIBgEAeAPLsjQxMqDBnjYNdrfI29Oqob4OuSyPyuI2qcxdIZsRfk8/y7LU429W09QpjYWGFYrLUCiuQrKF7+/xxGnz5u1at65KNlvkWYcAAAAAAAAAsBgIBgEAMc03NSFvT6sGe1rl7Z75//7pidn2eFuSquN2K9+9TrYIM/4sy1S3v0lNU6c0FJzUlDtPjuR8OezhZ/85nU7V1GxWdfVGOZ3OqNwXAAAAAAAAALwVwSAAIKZYlqWupnNqqz8hb3erxkcGwvZLsKeoPG6L8lzlMiIEgqZlqmW6TUcmu9Ue9MhwbVRB6oQSnIGw/W02myoq1mvTpq3yeOIW7Z4AAAAAAAAAYCEIBgEAMeXy6V/rxK9+GLE90Z6mNXFblOMqi7jfX9AydXJqTE+NB+QNJSnZ5VB19pDibMMRz1tWtlZbttQqKSn57d4CAAAAAAAAAFwXgkEAQEzpajwX9vVke6bK47cox1US8diAZemVyWk9Pz6tEdNUenxIt2aPSf7BiMfk5RVo27ZdysjIfNu1AwAAAAAAAMDbQTAIAIgp2UXr1NNaN/s81ZGtNfHblOksiHiMz7R0cHJaL0xMacK0VFOSoFuShzTmbZflt8Iek56eoW3bdik/v3DR7wEAAAAAAAAArgfBIAAgplTtOCBPfLIm2rxKHEpXatATse+UaerXE9N6aWJaCSlu3b67QJmOfrU2n9PoYCjsMYmJSdq6dYdKS9dEXIoUAAAAAAAAAJYDwSAAIKb4JgMKnIlTYSA/Yp8J09SLE9M64vdrY0WW/rBmnUKTnTp39hU1+X1hj3G7Pdq0aZsqKqplt9ujVT4AAAAAAAAAXDeCQQBATGn4RYOKA+GX/xwLmXphYkq9qW7tuqlE91VkqrurRSePP6XJyYmwxzgcDq1fv0nr12+Sy+WKZukAAAAAAAAA8LYQDAIAYsv03CVAR0KmXg0G5FqXrndurlR2Wpw6O9v17DM/1fCwN+xpDMPQunXV2rx5m+Li4qNdNQAAAAAAAAC8bQSDAICY4qzO0NTZfsXZbBoKhdSY6FDutiK9tzxDNpuh/v4+Pf30c+rt7Y54jpKScm3dWqvk5NSlKxwAAAAAAAAA3iaCQQBATKmuLVRfUYoGhqaUW5yqsviZ5T9HR4d18uRRtbY2Rzw2JydP27fvUmZm9lKVCwAAAAAAAACLhmAQABBzsnOSlJ2TJEmanJzUmTPHdelSnSwr/N6Dqanp2r59p/Lzi2QYxlKWCgAAAAAAAACLhmAQABCT/H6/Llw4owsXzigYDIbtk5CQqC1balVWtlY2m22JKwQAAAAAAACAxUUwCACIKZZlqaHhok6dOiafbzpsH5fLrU2btqqycr3sdn5VAgAAAAAAALgx8GknACCm1NWd19Gjh8K22e12VVdv1IYNm+VyuZe4MgAAAAAAAACILoJBAEBM6exsn/OaYRhas6ZSW7ZsV3x8wjJUBQAAAAAAAADRRzAIAIgp+fkF6up6PRwsKirV1q07lJqatoxVAQAAAAAAAED0EQwCAGJKVdUGxcXFa3R0RPn5hcrKylnukgAAAAAAAABgSRAMAgBiis1mU1nZ2uUuAwAAAAAAAACWnG25CwAAAAAAAAAAAAAQfQSDAAAAAAAAAAAAQAwgGAQAAAAAAAAAAABiAMEgAAAAAAAAAAAAEAMIBgEAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxACCQQAAAAAAAAAAACAGEAwCAAAAAAAAAAAAMYBgEAAAAAAAAAAAAIgBBIMAAAAAAAAAAABADCAYBAAAAAAAAAAAAGIAwSAAAAAAAAAAAAAQAwgGAQAAAAAAAAAAgBhAMAgAAAAAAAAAAADEAIJBAAAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxgGAQAAAAAAAAAAAAiAEEgwAAAAAAAAAAAEAMIBgEAAAAAAAAAAAAYgDBIAAAAAAAAAAAABADCAYBAAAAAAAAAACAGEAwCAAAAAAAAAAAAMQAgkEAAAAAAAAAAAAgBhAMAgAAAAAAAAAAADGAYBAAAAAAAAAAAACIAQSDAAAAAAAAAAAAQAwgGAQAAAAAAAAAAABiAMEgAAAAAAAAAAAAEAMcy10AAODGYbfz9yY3MofDvtwl4AbF2EI0MK4QDYwrRAtjC9HAuLqx8O9tAMBiMSzLspa7CAAAAAAAAAAAAADRxZ+aAAAAAAAAAAAAADGAYBAAAAAAAAAAAACIAQSDAAAAAAAAAAAAQAwgGAQAAAAAAAAAAABiAMEgAAAAAAAAAAAAEAMIBgEAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxACCQQAAAAAAAAAAACAGEAwCAAAAAAAAAAAAMYBgEAAAAAAAAAAAAIgBBIMAAAAAAAAAAABADCAYBAAAAAAAAAAAAGIAwSAAAAAAAAAAAAAQAwgGAQAAAAAAAAAAgBhAMAgAAAAAAAAAAADEAIJBAAAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxgGAQAAAAAAAAAAAAiAEEgwAAAAAAAAAAAEAMIBgEAAAAAAAAAAAAYgDBIAAAAAAAAAAAABADCAYBAAAAAAAAAACAGEAwCAAAAAAAAAAAAMQAgkEAAAAAAAAAAAAgBhAMAgAAAAAAAAAAADHAsdwFAACAlev8+fM6dOiQzp49q3Pnzqmzs1OS9Nxzz6mwsHDRj0NsuN7x8T//8z965ZVXVF9fr8HBQU1MTCglJUUbN27UBz7wAd12221LdQtYoa53bP3Zn/2ZHnvssYjt73//+/WFL3xh0evF6nA94+rw4cP68Ic/fNVz79q1Sw8//PCi1ovV4e28V2pra9O//du/6dChQxocHFRqaqp27dqlP/iDP9CaNWuWonysQIFAQIcPH9YLL7ygw4cPq729XaFQSLm5udq/f79+93d/VwUFBWGP5b07AACxhWAQAABE9C//8i967rnnluw4xIbrHR/f+ta31N7eroqKCm3btk0ej0ft7e164YUX9MILL+jBBx/UZz/72ShUjNXi7f7s2b9/v7Kysua8vnXr1rdTFla56xlXmZmZuv/++yO2P/PMMxofH9fOnTvfbnlYpa7359WxY8f08Y9/XBMTEyouLtZtt92mzs5OPfHEE3r22Wf19a9/nXEVo44ePaqPfexjkqS8vDzt27dPknTmzBl997vf1U9/+lP953/+Z9jfabx3BwAgthAMAgCAiLZs2aKKigpt2LBBGzdu1AMPPKCBgYGoHYfYcL3j40tf+pIqKiqUkJDwptePHTum3/u939M3v/lN3XXXXdq8eXO0SscK93Z/9nz84x/Xrl27olghVqPrGVdr1qzRl7/85bBt3d3devzxx2UYhu69995olIxV4HrG1fT0tP7oj/5IExMT+tjHPqY//dM/lc02s0PM448/rs985jP69Kc/raefflrx8fFLcRtYQQzD0J133qmPfvSjbwr/fD6fPv/5z+vHP/6x/uRP/kRPPfWUnE7nm47lvTsAALGFYBAAAET08Y9/fEmPQ2y43vERadZWbW2t7r77bv3oRz/SK6+8QjAYw/jZg2hY7HH1+OOPyzRN7dixQ0VFRYt6bqwe1zOunn76afX396u0tFR/8id/MhsKStK9996r5557Tk899ZQee+wx/c7v/M5ilotVYM+ePdqzZ8+c191utz73uc/pmWeeUWdnp06ePDlnVim/PwEAiC22q3cBAAAAVjaHY+bv3Vwu1zJXAgDz+8lPfiJJuu+++5a1Dqw+586dkyTt2LFDdrt9Tvvu3bslSc8+++yS1oWVz+PxqLS0VJLU19e3vMUAAIBlx4xBAABuQOPj4zp48KAOHz6sCxcuqKWlRWNjY3K73crOztamTZt0zz336KabbpJhGMtdLlaJlTquLl68qF/+8pey2+266aabluy6WDwrZWw988wzeuaZZ+T3+2f3Z9q0aVPUrofoWinj6o1Onz6t5uZmxcXF6a677lqSa2JxLee4mpqakiSlpKSEbU9NTZUkXbhwYVGvi+iL9rgKhULq7OyUNLMHKgAAiG0EgwAA3GC+9a1v6Wtf+5p8Pt+ctmAwqObmZjU3N+vxxx9XbW2t/v7v/175+fnLUClWk5U0rn70ox/p6NGjCgQC6uzs1KlTp+RwOPT5z39e69ati8o1ET0raWw98sgjb3r+0EMP6ZZbbtFXvvKV2Q/csTqspHH1Rq/NFrzjjjuUmJgY9ethcS33uEpPT5ckdXR0hG1/7fXh4WFNTEzM2ZMXK9NSjKvHH39cXq9X6enp2rZt22KVDgAAVimCQQAAbjDNzc2zHyzk5ORo7969qqmpUUZGhnw+n06dOqWf/vSnmpyc1LFjx/ShD31Ijz76qDIyMpa5cqxkK2lcnThxQo899tjs87i4OP35n/+53vOe9yz6tRB9K2FsVVVV6XOf+5x2796tvLw8eb1eHTlyRP/4j/+oF198Ub//+7+v7373u2/azwsr20oYV2/l9/v1i1/8QpJ0//33R+06iJ7lHle7du3Sv//7v+vFF19Uf3+/srKyZtuCwaB+9KMfzT4nGFw9oj2uOjo69Hd/93eSpD/+4z9m2XUAAEAwCADAjcYwDO3fv18PPvig9uzZM+eD7Pvvv18f//jH9bGPfUzNzc3q6OjQP/zDP+hLX/rSMlWM1WAljau//du/1d/+7d9qcnJSra2teuSRR/R//+//1dNPP61//ud/lsfjWfRrInpWwtj6yEc+8qbnBQUFuv/++7V37169+93v1smTJ/XUU0/p7rvvXrRrIrpWwrh6q+eff17Dw8PKzc2d3QsOq8tyj6s9e/Zoy5YtOnXqlB588EF97nOfU3V1tTo7O/XVr351dqlISfwhwyoSzXE1Pj6uP/iDP9Dw8LDuuusuve9974vWbQAAgFXEsCzLWu4iAADA4hkeHl7Qknd1dXW69957Jc3MuHrllVcUFxc37zH79u3TwMCAnnvuORUWFi64pus9DivHShxXb/QXf/EX+uEPf6g/+qM/0ic+8YnrOgeWx0ofW3/3d3+nb37zm3rggQf4A4pVZCWOq0984hP61a9+pf/zf/6PPv3pTy/4OKwcK2Fc9fX16ROf+ITOnTv3ptfdbrf+8i//Uv/3//5fGYahM2fOMDNslYjWuPL5fPrd3/1dHTlyRHv27NF//Md/LHhM8N4dAIAbG39CBgDADWah+2BVVVWprKxMkjQ1NaXW1tYoVoXVbqWPq/vuu0+S9Nxzzy3J9bB4VvrYKi0tlTTzYTxWj5U2rrxer379619Lev3nFVaflTCusrOz9YMf/ED/9m//pgcffFDvf//79elPf1o///nPtXPnTklSSUkJoeAqEo1xFQgE9KlPfUpHjhzRli1b9K//+q+MCQAAMIulRAEAiGGJiYmzj1/b2wR4u5ZjXKWnp0ua+fAdN67lGFsjIyOSdNXZPli9lmJcPfHEEwoEAtqyZYvKy8ujcg2sLNEcVzabTbfffrtuv/32N73+4x//WJJYqvYGtpBxZZqm/r//7//Tiy++qKqqKv3Hf/yH4uPjl6pEAACwCjBjEACAGOX3+9XS0jL7PD8/f/mKwQ1jucbV4cOHJc3MksCNaTnGlmVZevrppyVJGzZsiPr1sPSWalz95Cc/kcRswVixHD+vQqGQHnnkERmGoQ984ANRvx6W3kLGlWVZ+su//Ev98pe/VFlZmb75zW8qJSVlCasEAACrAcEgAAAx6oknntDY2JgkqaamRllZWctcEW4E0RpX586d0zPPPKNgMDin7fnnn9dDDz0kSXrve9+7KNfDyhOtsXXhwgX97Gc/k9/vf9Pr4+Pj+su//EudPXtW8fHxes973rMo18PKshS/Cy9duqTz58/L5XLpXe9616KfHytPNMdVQ0ODpqam3vTa+Pi4PvvZz+rChQv67d/+bVVXVy/a9bByLGRcffnLX9aPfvQjFRYW6jvf+Y4yMjKWukwAALAKsJQoAAAxyOv16h/+4R9mn3/iE58I2++FF17Qv/7rv84+f21JvU9+8pOz+5Tccsst+sM//MNFOQ6rWzTHVU9Pjz75yU8qOTlZNTU1ysjI0NjYmJqbm9XW1iZJevDBB/nQ/QYVzbHV1dWlP/3TP9UXv/hFbdiwQWlpaRoYGNDFixc1MjKi+Ph4PfTQQ/zxxA0o2r8LX/PYY49Jkm6//XZm7sSAaI+rb37zm3rqqadUU1Oj7OxsjY2N6cSJExofH9ddd92lP//zP1/sW8IKsJBx9eyzz+rb3/62JKmgoEBf+9rXwp7rwIEDOnDgwJte4707AACxhWAQAIAY4/f79alPfUqDg4OSZj4cuOOOO8L29Xq9On369JzXL168OPs43F5J13scVq9oj6uNGzfqk5/8pI4cOaLm5mYdP35cNptN2dnZuvfee/W+971PtbW1i3hHWCmiPbYqKyv1oQ99SGfPnlVDQ4OGh4fldDpVUFCg++67Tx/+8IdVWFi4iHeElWApfhdKM8s7/uxnP5Mk3X///W+3bKxwSzGuDhw4oIGBAdXX1+vUqVNKSEjQ5s2b9d73vld33333It0JVpKFjqvR0dHZx68tsR5OQUHBnGCQ9+4AAMQWw7Isa7mLAAAAS8M0TX3mM5+Z/ZCyuLhYP/zhD5nBgLeFcYVoYWwhGhhXiAbGFaKBcQUAAKKBPQYBAIgRlmXpc5/73OwHC/n5+frWt77FBwt4WxhXiBbGFqKBcYVoYFwhGhhXAAAgWggGAQCIAZZl6fOf/7weffRRSVJubq6+853vsDwe3hbGFaKFsYVoYFwhGhhXiAbGFQAAiCaCQQAAbnCWZemv//qv9f3vf1+SlJOTo4cffljFxcXLXBlWM8YVooWxhWhgXCEaGFeIBsYVAACINoJBAABuYK99sPC9731PkpSdna2HH35YJSUly1wZVjPGFaKFsYVoYFwhGhhXiAbGFQAAWAoEgwAA3KDe+sFCVlaWHn74YZWWli5vYVjVGFeIFsYWooFxhWhgXCEaGFcAAGCpEAwCAHCD+sIXvjDng4WysrJlrgqrHeMK0cLYQjQwrhANjCtEA+MKAAAsFYJBAABuQF/84hf13e9+V9LrHyyUl5cvc1VY7RhXiBbGFqKBcYVoYFwhGhhXAABgKRmWZVnLXQQAAFg8X/va1/Tv//7vkiTDMPTpT396QR8srF+/Xvn5+dEuD6sU4wrRwthCNDCuEA2MK0QD4woAACw1gkEAAG4wH/rQh3TkyJFrPu5LX/qSHnjggShUhBsB4wrRwthCNDCuEA2MK0QD4woAACw1lhIFAAAAAAAAAAAAYgAzBgEAAAAAAAAAAIAYwIxBAAAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxgGAQAAAAAAAAAAAAiAEEgwAAAAAAAAAAAEAMIBgEAAAAAAAAAAAAYgDBIAAAAAAAAAAAABADCAYBAAAAAAAAAACAGEAwCAAAAAAAAAAAAMQAgkEAAAAAAAAAAAAgBhAMAgAAAAAAAAAAADGAYBAAAAAAAAAAAACIAQSDAAAAAAAAAAAAQAwgGAQAAAAAAAAAAABiAMEgAAAAAAAAAAAAEAMIBgEAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxACCQQAAAAAAAAAAACAGEAwCAAAAAAAAAAAAMcCx3AUAAAAAAFavysrK2cf19fXLWAkAAAAA4GqYMQgAAAAAAAAAAADEAIJBAAAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxgGAQAAAAAAAAAAAAiAEEgwAAAAAAAAAAAEAMIBgEAAAAAAAAAAAAYoBjuQsAAAAAAFyb8fFxPfbYYzp48KAaGho0NDSkQCCg1NRUrVmzRjt27NCdd96pdevWRTyH1+vVD3/4Q7300ktqaWnR8PCwEhISlJeXpz179ug973mP1q5du4R3BQAAAACINsOyLGu5iwAAAAAALMz3vvc9fe1rX9PIyMhV+37961/XzTffPOf1H/7wh/ryl7+ssbGxiMfa7XZ98IMf1Gc/+1nZ7faI/SorK2cf19fXX7UmAAAAAMDyYcYgAAAAAKwSf/M3f6NHHnlk9rndbtfGjRtVUlIit9str9erixcvqrOzU5Lk9/vnnOMb3/iGvvKVr8w+d7lc2rlzp/Ly8jQ6OqrDhw9reHhYoVBI3/nOd9Td3a1/+qd/kmEY0b9BAAAAAEBUEQwCAAAAwCrwve99702h4N13363PfvazysvLm9O3oaFBjz76qDwez5teP3HihL761a/OPr/55pv1pS99SZmZmbOv+f1+PfTQQ/rGN74hSXr66af17W9/Wx/96EcX+5YAAAAAAEuMpUQBAAAAYIUbGRnRbbfdpomJCUnSBz7wAf31X//1NZ/ngx/8oI4ePSpJ2rp1qx5++GG5XK6wfd84OzExMVEvvviiEhMT5/RjKVEAAAAAWD1sy10AAAAAAGB+//M//zMbChYUFOgv/uIvrvkcjY2Ns6GgJP3VX/1VxFBQkj796U8rLS1NkjQ+Pq4nnnjimq8JAAAAAFhZCAYBAAAAYIX79a9/Pfv4ve9977yBXiSvvvrq7OPq6mqtX79+3v7x8fG65557Zp8fPnz4mq8JAAAAAFhZCAYBAAAAYIU7c+bM7ONdu3Zd1zkuXrw4+3jr1q0LOmbbtm2zjy9cuHBd1wUAAAAArBwEgwAAAACwgo2Pj2t6enr2eVFR0XWdx+v1zj7Oz89f0DEFBQWzj4eGhq7rugAAAACAlYNgEAAAAABWsNf2FnxNfHz8dZ1ncnLyms8RFxcXsQ4AAAAAwOpDMAgAAAAAK1hCQsKbnr8x4LsWbwwDF3qOqampiHUAAAAAAFYfgkEAAAAAWMESExPl8Xhmn3d0dFzXedLT02cfd3d3L+iYzs7O2cdpaWnXdV0AAAAAwMpBMAgAAAAAK9ymTZtmH7/66qvXdY7q6urZxydPnlzQMSdOnJh9vH79+uu6LgAAAABg5SAYBAAAAIAV7uabb559/IMf/EB+v/+az7F79+7ZxxcuXFBdXd28/aempvSLX/wi7PEAAAAAgNWJYBAAAAAAVrj3ve99s3sEdnZ26m//9m+v+Rxr1qzRjh07Zp9/8YtfVCAQiNj/oYce0uDgoKSZ5Uzvueeea74mAAAAAGBlIRgEAAAAgBUuJSVFf/qnfzr7/Pvf/77+6I/+SD09PWH7X7p0SX/zN3+jl19++U2vf/rTn5bdbpckHTt2TJ/61Kdmw7/X+P1+ffWrX9W3v/3t2dc++clPKiEhYZHuBgAAAACwXAzLsqzlLgIAAAAAcHWf//zn9b3vfW/2ud1u18aNG1VaWiq32y2v16sLFy6os7NTkvQv//IvOnDgwJvO8Y1vfENf+cpXZp+7XC7t2rVLeXl5GhkZ0eHDhzU8PDzbfscdd+j//b//J8MwwtZUWVk5+7i+vn4xbhMAAAAAECUEgwAAAACwinznO9/RP/3TP2l8fHzefoZh6D//8z+1f//+OW0/+MEP9OUvf3nec9jtdv3O7/yO/uzP/mx2lmE4BIMAAAAAsHoQDAIAAADAKjM0NKTHHntML7/8si5fvqyhoSFJUlpa2uxegu9617tUWloa8Rxer1c/+MEP9NJLL6mlpUUjIyNKSEhQbm6u9u7dq/e85z1au3btVWshGAQAAACA1YNgEAAAAAAAAAAAAIgBtuUuAAAAAAAAAAAAAED0EQwCAAAAAAAAAAAAMYBgEAAAAAAAAAAAAIgBBIMAAAAAAAAAAABADCAYBAAAAAAAAAAAAGIAwSAAAAAAAAAAAAAQAwgGAQAAAAAAAAAAgBhAMAgAAAAAAAAAAADEAIJBAAAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxgGAQAAAAAAAAAAAAiAEEgwAAAAAAAAAAAEAMIBgEAAAAAAAAAAAAYgDBIAAAAAAAAAAAABADCAYBAAAAAAAAAACAGEAwCAAAAAAAAAAAAMQAgkEAAAAAAAAAAAAgBhAMAgAAAAAAAAAAADGAYBAAAAAAAAAAAACIAQSDAAAAAAAAAAAAQAwgGAQAAAAAAAAAAABiAMEgAAAAAAAAAAAAEAMIBgEAAAAAAAAAAIAYQDAIAAAAAAAAAAAAxACCQQAAAAAAAAAAACAGEAwCAAAAAAAAAAAAMYBgEAAAAAAAAAAAAIgBjmid2DRNjY+Pa3R0VH6/X6FQKFqXAgAAAAAAAAAAAFY1u90ul8ul5ORkJSYmymZb/Pl9hmVZ1mKfdGxsTJ2dnbIs68r/FvsKAAAAAAAAAAAAwI3FMCTDMGQYhgoKCpSUlLS451/sYPC1UNA0TZnmTChoGEZUUk0AAAAAAAAAAADgRmCapizLkmFINttMtrbY4eCiBoOmaaqhoUGmaSoUsuTxxCkxMVlut0eGYSzWZQAAAAAAAAAAAIAbimVZ8vmmNT4+qunpKdntM+FgRUXFok3AW9RpfOPj47IsS6Y5EwpmZGTL44kjFAQAAAAAAAAAAADmYRjGm/K1mZU5LY2Pjy/aNRY1GBwdHZ3dUzAxMZlAEAAAAAAAAAAAALgGhmEoMTFZljUzi3B0dHTRzr2owaDf75/dU9Dt9izmqQEAAAAAAAAAAICY8No2fZY1k78tlkUNBkOh0MxJbTZmCwIAAAAAAAAAAADXwTCM2X0FX8vfFsOiBoMAAAAAAAAAAAAAViaCQQAAAAAAAAAAACAGEAwCAAAAAAAAAAAAMYBgEAAAAAAAAAAAAIgBBIMAAAAAAAAAAABADCAYBAAAAAAAAAAAAGIAwWAMOH78mHbv3qbdu7fp61//9+Uu52157T4+8YnfW+5Slsx99/2Gdu/epvvu+43lLuWqbqSxFs4TT/x09v6eeOKny10OsGLd6D8LrsVCvhar6ed8tHziE783+3V6uxh/ALAw/Ly8cd0I31veQ0Uf/74Fls+N8HM6Vi3kZye/n96+WMwAlppjuQvA6vDEEz/V3/zN56/pmL/8y8/rnnveHZ2CVqgvfOFz+sUvfhax3el0KjExSSUlJdq2rVb33HOv8vPzl7DC1SUQCOjF/7+9+w5r6mzjAPwLQfZGEBDFheJmOerWulfds1pbt9Zq1TpRceunVlvrHq0bR92i4kQRF1MUGYIsGbJDgDCSfH+kOSZmAkFUnvu6ejXknJzzZnie867n9b0HX997iIh4jaysLPB4hdDV1YWFhQVq17aHo2MTtGrVGm5u7jA0NKrqIn9WoqIi4et7DwDQtWt3NG7cpIpLRIj6lMUdLS0tGBgYoGZNKzRu7ITu3Xugc+eu0Nam2xpVeLxCREdH4fXr14iIeI2IiHDEx8eBz+cDKFvsFgqFSExMwOvX4f8d6zUiIyNQUJAPAHBxccOePQcq7b1UxNWrl5GSkgwAmDp1RqWfb+bMqQgODizz6zZv3oauXbtXQokAX997iIqKBACMGTMexsbGlXIeTUhOTsawYQMVbjcwMIS5uTkcHRujS5du+PbbXtDV1f2EJSSk+gkJCcaMGZMBAE5OTfHPPyeU7v/+fRoGD+7H/D1p0mTMmDFb6WsuXbqAjRvXAgB69eqDtWs3VrDUn5cDB/bi0KH9crcZGBjA0NAQRkbGqF+/AZo0cYKLixtatWr9iUupmJfXCeTl5cHY2Bhjxoyv6uJ8dsQN/ba2dtWuXYQQUrkoBlecshisyK5d++Hm5q6R81N7HfkcUAsaIZ9QSUkJsrOzkJ2dhZCQYBw/fgTTp8/G+PETqrpon51Xr15izZqViI+Pk9lWUFCAgoICJCUl4enTJwAAc3MLXL9++xOX8vMWFRXJ3OjY2trRjQb5aggEAnC5XHC5XMTFvYWPz3U0buyEDRs2w96+TlUX77M2eHB/cDi5GjnWn39ux6lTxzVyrE/t2rUrTEfdp+gY/Bz5+t5nBjMNGDD4s+4YVKWgIB8FBfl49y4J9+/fxeHDB7Bu3UY4OTWr6qIR8tVq3rwF9PT0wOPxEB0dBS43D0ZGiq8jgYHPpf4OClI9WCIwMIB5rKmGuC+FuL6Tnp6Ot29jcfeuqJ7j4FAPY8aMw5Ahw8Fisaq0jF5eJ5GamgIbG1vqGJRDXA9zcXGjjkFCiEZRDP7yUXsd+RxQxyApMzc3d4waNVblfk2aOGn83E+eBGn8mJVl5MgxcHdvI/VccXExkpPfwdf3HsLDX6G4uBg7d26HoaEhhgwZVkUl/fxERIRjzpwZKCgoAADUrFkT3bp9i0aNHGFiYoKiIh7ev3+PiIjXCAh4hry8PAgEgiouNSGksnwcdwQCAXJzcxEWForbt31QVFSEqKgI/PLLLBw/fhoGBgZVWNqyu3jx2ic7l0DAl/rbxsYGJSWlyMzMqPCxDAwMUatWLbx9G1uhMlaUm5v7Z32/MG3aLDRs2FCtfZs3b1HJpfnymJubY8kSD6nn8vLy8OrVS9y86f3fwKFE/PLLbPz99zHUrm1fRSUl5OtWo0YNtGrljGfPnoDP5yM4OAidO3dVuP/HjZDh4S/B4xVCT09f4WskZ1q7urZRuN/XoGfP3ujVqw/zd0lJCbjcPGRkZCA8/BVCQoJRUJCP+Pg4bN68Abdv+2DNmo2wtLSUe7zPPRaq42t4D4QQUhkoBmvWxzFYEXEdbuDAwTTgg3wVqGOQlFmtWraVltbqa9KkiZPCz+mHH37C3r278M8/hwCIprAPHjwEWlq07CcAbNy4jukU7N9/EBYvXqYwJVhpaSmeP3+KO3dufcoiEkI+IUVxZ8iQYfj++x8wa9ZUZGdnIzn5Hc6e9cIPP/xUBaX8MnTp0g116zrAyakpnJyawszMXGUabEXq12+AMWPGo2nTZnByaoq6dR0QFBSI2bOnVULJvx6tWzvTqNsK0NXVk3s9GDhwMCZOnIRZs6YhJSUZHE4u9u7d9dWlPSLkc+Lq6oZnz0TZO4KCAtVqlOzQoRP8/f1QWlqK0NBQtGvXXu7+CQkJSE9/DwCwsrJG3bp1NVz6z4uDQz2ldez8fC4uXjyP/fv3oqiIh8DAACxcOBd79hxQ2rBLCCHk60QxWHNUxWBCvlbUC0FIFZkyZTpMTc0AAJmZGXJTZlZHb9/GIjIyAgBQq5YNli3zULpOkLa2Nr75piM8PDw/UQkJIZ+T+vUbYNKkKczfDx7cr7rCfAFWrlyDSZMmo337DjAzM6/QsYYMGY558xagT59+cHCoV+UpzQixtbXDb78tZf5++NAXxcXFVVgiQr5ukoMcJFOOfSwtLRXv3iUBAPr3HwgbG1sAQFDQc4WvCQqiFGaSDA2NMH78RBw8+A9MTEwAAK9fh2Pnzh1VWzBCCCFVgmIwIaSiaMYgYWRlZWH+/DmIiHgNAPjuu6FYtGgZ2Gx2hY8dGRmBx48f4cWLULx9G4usrCwIBHyYmJiiYcNG6NixEwYNGqIy/Vv79q4ARHn69+w5IHefzMwMXLjwL54+fYKEhHhwuVzo6enBzMwM5ubmaNzYCZ06dUb79h3kztBLSkrEhQv/IigoAElJiSgoKIShoQFMTExRs2ZNODk1Q9eu3eDi4lahz0RbWxt169ZFWFgOACA/P79CxwOAgIBn8Pa+hhcvgpGZmQmhUIiaNWuiVStn9Os3AG3atFP7WP7+j3D37i2Ehb1AZmYGCgt5MDY2goNDPTg7u+Dbb3uXOwe2st9aXNxbZr8WLVpCW7tGuc4hT1FREby9r8DP7wGio6ORk5ONGjV0UKtWLbi5tcHIkaNRt66Dxs738KEv7t69jbCwUGRlZYHPF8DCwgKtWrXGgAGD0batet9HcXExrl+/Bn9/P0RGRiAnJxt8Ph/m5hZo2LAR3N3boHfvfrCysgIAXL16GevWeUodY906T5nngC8rPS/5+mgq7ri6frgeJyYmKNxPKBTixYtQPH36GGFhLxAX9xa5uTlgsVgwMTH9b6Z3N/TtOwA1aqh37XnyxB8XLvyLly/DkJfHgbm5BZycmmLYsBFo1+4btY4xZMgAZo2ej9OKnjhxDDt3bgcAuLi44q+/9in8fBIS4jFp0ngUFBRAT08Pf/99HPXrN1CrDJ8bgUAAb+8ruH79Gt6+fQsuNw8WFhZwdW2DUaNGK107LjAwgJm1OHnyNKk1BGfOnCqVFgf4cG8hqX//QVi5crXUc9HRUbh06TxCQoKRkpICHo8HIyMjmJmZwcrKCs2bt0D37j3h5NS0Im9dqYUL58HP7wEAUfaBmTN/Vrivj88NrFy5DICoA/3vv49DT09P7gzRYcMGyrz+43styc/uyZMgCAQC3LjhDR+f64iJifkvzpVKxZXS0lIEBgbg2bMnCA9/iYSEBOTm5kBbWxtmZuZo1qw5evbsjW7demgsa0Lbtu2gq6uLoqIi8Hg8JCUlokEDUdqf/Px8+Pv7ITDwOSIjI/DuXRLy8wugr6+HmjWt0KqVM777bqhaKVy/lPtNQjRBUbxu2rQ5DAwMUFBQgDdvopCXlyd3vVLJBkZXV3e4urrB2/uq0jWOJLd93CiZk5MNP78HCAoKRHR0FFJSklFYyIOhoQFq1bKBi4sbhg4drjIGlvW6lpyczFwvxXHi/fv3OHfuNPz8HiA1NRUAYG9vj65du2PMmHEwNDRSWoaycnRsDA8PTyxaNB8AcPnyRUyc+CNq1bKR2k9ZLJRUntgmvm8RS01NkRtLPz7vx3X5vLw8XLjwL3x97yI5+R1ycnLg7OzKxB5134OkoqIiXLz4L27duomkpEQUFhbCysoKbdt+gzFjxiud9SIZH8+fvwo7OzuF+0rWtzw8PKVSyn38WQQHB8r9fD5+nRifz8etWzfx4MF9hIe/Qk5ONlgsFqysrOHs7IqhQ4ejaVP11tC9desmrl69hKioSOTn58PSsiacnV0wfPgotGjRUq1jEEKqFsXgzycGiymLAWV5rVhZ2+syMzNw8eJ5PH36BElJieBwcmFoaIS6devim286YcSIUcwgInnKU68Ti4qKxNWrlxEUFID379NQWFgIU1NTNG7shO7de6Bfv4HQ1lbd3fT2bSy8vE7g2bOnyMzMgKGhEerVq4c+ffpj0KDvNNIXQVSjjkECQNQ4MW/ebCQliUaRTJ48FVOnztTIsQ8d2o8DB/bK3ZaZmYHMzAw8e/YEx48fwaZN2yq0no6//yN4eCxBQYF0J1t+Phf5+Vy8e5eEly/DcP78Wdy4cUdmtsSVKxexZcsmmdHlHA4HHA4HSUmJCAkJxqVL53Hv3qNyl1MsJyeHeWxjY6N4RxV4vEJ4eq7A/ft3ZbYlJSUhKSkJ3t5X0a1bD3h6rlWabiY1NQUrVixFWNgLueXNyQlBaGgIjhz5G7dv+ypd4FgeVb81Pv/DmlVZWVllOrYyQUGBWLVqOZMOQay4uBixsVzExsbg/PmzmDZtZoXTEKalpcLDY4nczzAlJRkpKcm4efM6unf/FqtWrVH6fQQGPoen5wqZcgPA+/dpeP8+DY8fP8LNm9dx9OipCpWbkE9Jk3FHR0eHeVxUVKRwv/XrV+Pq1ctyt6Wnv0d6+nv4+T3A8eNHsWXLDqWNRwKBAJs2rcflyxeknk9LS0VaWip8fe9h9Oix6NKlYilJxo37Hs+fP8WTJ/4IDg7C338fxJQp02X2KykpgYfHUiYN8/z5i77YTsH8fC4WLZovM/I1NTUV3t5XcPOmN6ZOnYFJkyZ/sjIdPnwABw/uk1nPNjc3B7m5OYiPj0NAwHM8fPgAJ0+erbRyrFjhie+/H4P09Pc4duwftG3bDm5usmt+JCUlYtOm9QAAXV1drFu3CXp6ehorB4fDweLF8xEcrHxwydy5s+SOYC4tLUVqagpSU1Nw9+5ttGzZGps2bVW4VlZZaGtrw8TEBOnp6QAALpcLQPRvpH//nnKvEVwuF1wuF3Fxb3H58gUMHTocCxYsVlip/dLuNwmpCFXxulUrZzx54g+BQIDg4EB06dJN5hjiBsZ69erDwsICLi6u8Pa+ivDwcBQWFkJfX/ZeWHIQh+R17t27JIwaNQx8fqnMa8T/hqKjo3D2rBemTp2Bn36aqtb7VPe6JikwMADLli1Cbm6O1PNRUZGIiorExYv/YuvWP9CkiZPax1RHly7d4OTUFBERr1FSUoLr16+VKyZWZWyLjIzA4sXzmYZcTXj/Pg0LFsxFdHSU1POi+vBZXLt2GYsWLcOAAYM0dk5Ni4l5g2XLFsnNJpSQEI+EhHhcvnwBI0aMxq+/LlTYcMrj8bB8+SI8euQn9by4LurjcwOzZs1hMhgRQj5PFIMVq6oYXNVOnz6FPXt2gsfjST2fm5uDsLAchIW9gJfXcaxZswHt23dQeTx1P/vi4mJs2bIRV69ehlAolNqWkZGBjAw/+Pv74dSpE9iyZbvSdd4vXbqArVs3oaSkROL4WcjOzkJwcBBu3vTGli3bVZadVBx1DBK8fh2O+fN/QXZ2FthsNhYuXIyhQ0do7Pg8Hg9sNhvNmjVHq1atUaeOA4yNjSEQCJCSkgw/v4d48SIE6enp+PXXOTh27JTMiEd1pKenw8NjMdMo6uLiho4dO8HS0hI1auggJycHsbExCAh4hoSEeJnXR0ZGYNOm9eDz+WCz2WjX7hu0bdsO5uYW0NLSQlZWFt68icKzZ0/B4eRW+HMJDHzOzGxp0KAhata0Ktdx+Hw+fv31FyZwGxgYoH//QWjWrDlYLBbCw1/h2rXLKCgowP37dzF/Pgc7d+6RW4l49y4JU6ZMQna2qEPOxMQUvXr1hpNTUxgaGoHDyUV0dBQePfJDamoKPooFKqnzW7O3r8M8DgsLxatXLyvUWQyIGvAWLfoVpaWl0NLSQvv2HdCmTVtYWVmjuLgYr1+H4/r1q+Byudiz5y8AKHfnYFpaKiZPnoiMjAwAQOPGohlI9vZ1oKWlhfj4OFy/fg3v3iXh3r07KCwsxPbtO+Wm4PP1vYdlyxYzN1116zqgR4+ecHBwQI0aOsjMzMCrV6/g7/9QKjC7u7fB5s3bEBDwHGfPegEARo4cA3f3r3vBaPLl0HTciY2NYR4rix88Hg81atRA69bOaN68Bezt68DQ0AjFxcVISkrE/ft38eZNNOLj4zB//s84cuSkwlGGO3ZsYzoF2Ww2+vTpB1dXN9SooYOoqEhcuXIJp0+fQlpaWrnfFwCwWCysXLkGEyaMQWZmBv7++yDc3dvA2Vl65PnOnTsQFSVKw9yzZ28MHjykQuetSmvXeiIwMAD29nXQv/8g1KlTB1xuHvz8HuLRo4fg8/nYu3cXDAwMMWrUmDIde/r0WcjNzcG+fbuZ383mzdtk9pP8HT144Iv9+/cAEHWyderUBa1bu8Dc3BwCgQAZGRmIiorAs2dPK/Cu1WNqagZPz3WYM2cGBAIBPD09cOyYl1THU2lpCVasWMp0Ws2duwANGzZito8aNQZdu3bDmTOnmE67JUuWw9zcQuZciqxatRzBwUFo0KAhevXqA3v7OsjPz5eZjcnj8aCvrw9XVzc4OTWFrW1tGBgYgscrRFzcW9y9ewtJSUkICwvFkiULsGfPQbVGmCpTWloKDieP+dvISPRvWCAQoKioCBYWlnB3bwtHR0fUrGkFXV095OVxEBERjjt3boHD4eDChX9hYGCIOXPmyRz/S7vfJKQi1InXrq7uePLEH4BoVoKyRknxDFjx//n8UoSGBss0XEmubWRjYws7u9rMtpKSEvD5pbCxsYG7e1s0bNgIFhaWqFGjBrKzs/HqVRju3LmNoiIe9u/fAxMTU4wYMUrle1X3uiaWlpaCpUt/A4eTi44dO6NTp84wMjJGYmIivL2vICkpEenp6ZgzZyaOHDkBW1vFs8/Ko0+ffszskaCgwDJ3DFYkti1d6gEej4dNm9YhOzsb5ubmWLLEQ2Y/B4d6cs/N4eRi0aL5SEtLRdu27dGpUxdYWFggMzOTqYeWFZ9fimXLFiE6OgqNGzdBnz79UKuWDbKysnDv3m0EBwehqKgI69evhrGxsdzfqaaI7ysWL14AQFTXnz59lsx+HzdWR0ZGYObMqUz8dnZ2QYcOnWFrawuBQIA3b6Jx7doVZGVl4ty50ygtLZH7uQPAypXLmE5BXV09DBo0mKlTv3wZhqtXr2Dnzh20phYhnzGKwZ9vDK6IirTX7d27C//8cwgAoK+vj+7de6Jly5YwNTUDh5OL58+f4f79u+BwOFiwYB527dor027wMXU++9LSUsyb9zMz+9TKygo9e/ZBo0aO0NPTw/v3abh//y5CQ0MQGxuDGTOm4OjRUzA3l1265N69O9i0aR3Thunu3gbdun0LU1NTJCe/g7f3VQQHB2HdutUyryWaV7BvOYYAAHMfSURBVKUdgwKBENzCEtU7fmWM9GtAS+vzWIfnyRN/LFu2CAUFBdDV1cXatRs1fpPcvfu3GDVqLJPm8GMTJ/6I69evYe3aVeBwcnHw4H4sX76yzOfx8bkuNVNCWWPhy5dh0NeXTlt65cpFZrbapk1bFS7cKxQKERJSvvSLxcXFSElJhq/vfeZiXqNGDcydO79cxwOAU6eOMxdsW1s77Nq1Typw9+s3AGPHjsesWdOQmpqCoKAAnDx5DBMmTJI6jkAgwNKlvzGVsc6du2LVqjVyZwT+9hvg7+8nNUtHFXV/a02aOKFevfqIi3uL0tJSzJkzA0OHjkD37j3g5NS0zKlFMzLS4enpgdLSUpibW2DLlt/RokUrqX369x+ICRMm4ddff0ZMzBvs378HXbt2R7169ct0LqFQCA+PJcjIyACbzcZvvy3FkCHDZPabOPFHrF27Crdu3cSTJ/64fPkivvtuqNQ+KSnJWL16JdMpKJ4dI69Dt6ioCAEBz5i/bWxsYWNji7y8Dw2johSJVPEDAIFQgPySgqouxidnWMMAWqyqX1pY03GnuLgYx44dYf5u3dpZ4b7Dh4/EokXLFKbVmDx5Go4d+we7d+9EUlISTp/2wk8/TZHZLzQ0hLmJ19fXx/btf8HZ2YXZ3qdPP4wdOx6zZ0+XO5O7rCwsLLBq1VrMnTsLfD4fq1aJOoPE7+PhQ1+cOSOaMWxnVxtLliyv8Dmr0v37d9G1a3esWbNBao3ZoUNHwMfnBlavXgE+n49du/5Ep06dpWKeKuLvycvrJPOcqmvjpUv/AgDYbG3s23dYYapQPp+Ply/D1C5Lebm5uWPSpMk4fPgA0tPTsXatJ7Zt+4PZvnv3X3j9OhwA0K1bDwwbJt2A4OTUFE5OTeHre595rm3bb5SmTPvY48ePMGLEKPz6629ScenjmDd9+my0atVK4cz4adNmYseOrTh37gzCwl7g1q2b6NdvgNrlkOf586coKhKNoNXV1WMGHGlra+P333eifftv5Kb2/O67oZgxYzYWLpz330jbExgxYpRMQ8KXcL/5tRAIhCiqhnVF3c+krqhuvFa1xpHk2kbi1N/29nVgZWWN9PT3CAwMkGmUVLa2kbm5BfbuPSQVdyUNHz4SU6bMwLx5s5GQEI+9e/9C//4DVS5Zoe51TfK9stlsrFmzAb1795XaNn78BKxcuQy+vvfA4eRi8+b12LFjl9Lzl1XLlh/qM69fvyrz6ysS28Sp0rdv3wpAdK0tSz0jJuYN2Gw2PD3XoW/f/mUuuzzp6elIT0+XO5Nu1KgxOHbsH+za9ScEAgE2blwHNzf3Sksx9/FnYWpqpvLz4fEK//v3lg89PT2sXbtRJj706dMPP/zwIxYvXoDAwABcvHgePXr0klmewsfnBrPmtqVlTezatU+qXtuv30CMGDEas2dPh6/vvQq8U/I1EwqEEBbJzgr72rF0tcGiGCz3+ADFYE0ob3vd48ePcOTIYQCiZZc2bNgCa2trqX2GDBmO0NAQ/PrrHBQU5GP16pU4e/ai0oGX6nz2+/fvYX4X3303FPPnL5KqpwPA2LHf4/TpU9i+fQvS099jx46tWL16vdQ+XG4e/ve/DUyn4Lx5CzBmzHipfcaNmwBPTw/cuXNL6edBNKPKOgafhafh6M1IcPKLVe/8lTEx1MHEPk3QtlmtKi3H9etXsX79GpSWlsLExAT/+992hRd3Sd7eV2TWpvmY5D/uZs2aqzxmv34D8PTpY9y44Y1bt25g8eKlZe4ASkpKZB4PHvyd0n3l5dMXv97c3FxhIw0gmr2hznovinJEi+no6KBjx0748ccpMh1V6iotLcGpUyeYcq1du1FuA6mdXW2sW7cRU6f+CKFQiFOnTmDMmPFSa2jduXMLUVGRAESz3DZs+J/SNbY6dOikdjnL8ltjsVjw8FiFn3+eAR6Ph4KCApw4cRQnThxFjRo10KiRI5ycmqJly9Zwd28rEwg/dvz4UWbE/caN/1P4WVtbW2P9+s0YP34U+Hw+Tp8+hcWLl6n9HgHg4cMHTPrQyZOnK7yJ0dHRwcqVa/DyZRhSUpJx8uQxmY7Bo0f/YUaLDh8+EpMnT1N4Xl1dXXTs2LlMZa2uAlNDcSriAvKKuVVdlE/OWMcIY52Gws2mdZWVobxx52MCgQAcTi5evHiBf/45hPDwlwBEDf/jx09U+DpV124Wi4WJE3/Eo0cPERoaAm/vK3I7Bk+ePMbczM6e/Yvc91CzphXWrduESZPGS6VILq+2bdthwoRJOHr0b6SlpWL9+tXYvHkb3r9/z4ymY7O1sWbNhjKneP7c1KplA0/PdTKVDQDo3bsvwsNfwcvrBIqKeDhzxgvz5i2o1PKIU/c0adJE6fqBbDZbace0mHi9JFXkrTkpNnnyNAQGPkdoaAgePXqI06dPYvTocXj8+BFOnTr+3+ttsGxZ2QdaqaNJEyfMn79I5bqAqtbS1dbWxrx5C/HokR9SUpLh7X21Qh2DaWmp2LZtM/N3585dmEFMbDYbHTp0VPp6U1MzrFy5BiNHDgGfz8eNG9748Ufpa8Dndr/5tYoOfw/fm1EozK9+HYP6hjXQtU9jODZTfo9bmcoSr8WZRfLzuXjzJhq5ubkwNTVltkuvbfThN+3i4gofnxtS2z+8Rn4KMwAwNTVVee9gZ2eH335bgjlzZoLL5eLBg/sqO6DUva5JGjVqrEyDJCC6N/f0XIcxY4YjLS0VT548RnR0FBwdG6t9bFVsbGyZx3l5eSgtLSlT/VnTsa2sRowYpbFOQbGmTZth/vzf5H6HEyZMwsuXYfD1vYfs7CxcvXoFo0eP1ej5K+LSpYtM4/3ixcsVxgcjI2Ns2PA/DBs2GPn5XJw8eUwm1h4/fpR5vGzZCrmDXevXb4Dly1diwYK5GnwX5GtR9DYbXP8kCHnVsGNQTxtGHeyhW192ptOnQjFYPZqMwYcO7cehQ/uVnm/z5m1VOth+795dEAqFMDc3x7Ztf0p9z5Jat3bGL7/8ik2b1iElJRn37t1Br159FB5X1WeflZWF06dFg2rbtGmHpUtXKDzW6NFj8erVS/j4XMft2z6YPXuuVJvttWtXkZ2dDQDo0aOnTKcgIJo8s2KFJ169CtNounEiX5VNHTh87XW17BQEAE5+MQ5fe12lZTh69B+sWbMKpaWlsLauhX37DpercVaTxBUOHo+HN2+iy/x6ydHo4rQq5Xl9bm4ukpOTy/z6smKz2dDV1atQyqwXL14gM1OUstLV1U3pAuItWrRiRvtkZWXixYsQqe03bnxofJw6dbrSTsGyKM9vrUWLVjh8+JjM6KSSkhK8fh2OCxf+xZo1KzFkSH/MmTMToaEhco8jFApx/brofbVs2UrlFPp69eozHdlPnz5W8x1+IO4w19HRwejRytPb1ahRA717i4JzfHwcUlNTmG18Ph8+PjeYY02ZMqPMZSHyHQs/Wy07BQEgr5iLY+GVt/aZKhWJO97eV9C+vSvzX4cO7ujb91ssWvQr0ymoo6ODDRu2lHmmrzytW4vKlZSUKLNmQXFxMfz9RamZjIyMMHjw0I9fznB0bIy2bdtXuDxi06bNZK7zvr73cOaMFzw9PZgyTp8+U2kc+FIMHz5K7loXYuPGTWAqLvfu3an08ojX5ktKSpIa2VmV2Gw2Vq9ez8wa/euvP+Dv74c1a1ZBKBT+NxNjvdJF5ytixIhRZaq4K6Otrc38bsPDX8msWfGxoiIefH3vSf137doVbNmyEePGjWIau42NjTFjxuwyl6dOnbqwtKwJAHJngH5p95tfqrvXIqtlpyAAFOaX4O61yCo7f1njNZvNhrOzMwDRvffHab/EMxgcHOox/7YAUaMkAERERCA/X3q9zuBg+Q2ZZSGO5YD8f8sfK+t1TUtLC+PGTVC4XV9fH8OHj2T+1nS8MjaWvr7n5nLK9Pqqjm2jRmm+U07y/kCeCRN+YB7fu3db4+evCHE90srKGn369FO6r6mpGTp2FA3SDQ4OlFqvNjk5mUkt7+BQT+ng0Y4dO2vkvpl8fbh+idWyUxAAhLxScP0SVe9YSSgGq6eqY/Cn9uZNNCIjRdf2wYOHKOwUFOvTpx/YbFFbs6q2TVWf/e3bPswa7d9/r3gQttiAAQMBiNo1JTObAZDKpjRu3PcKj6Gnp4/hw1WnoCUVR2sMVjNCoRC//76FSTnWoEFD7NjxF6yt1Z+96ObmrvJGXnItG/F5Hz9+hLt3byMi4jXev09Dfn6B3AVrAeD9+/dwcmqmdpkAoF279swo+SVLFmLChEno0aOn2rmk27Vrj/v370IgEGDWrKn44Ycf0aVLd1haWpapHGLyckTz+XxkZWUhLCwU9+/fxd27t+Hrex8LFy4q1/par159CLDilC7KtGv3DQICngMQBWfJ0T8hISEARKNr1FmgVpWK/tYaNGiIXbv2IzY2Bvfu3UFISDBev34FLvdDp45AIMDz508REPAMU6fOlJnZ8/ZtLNNgbmxsolaqFHFATE5+h6KiIrkzVhQRp/yysLBgPmdlJNdAevs2lhn5++ZNNPLzRe+zZctWcvNyE/Kl0ETcUaVLl25YuHCxWscsLS3F/ft38eDBfURFRSIjIx0FBQUQCARy93///r3UWmvR0VHMItmtWjmrTKns7t4Wjx8/Uv/NKKGtrY01azZi4sQx4HK5+P33/zHb2rRpJ5Mi+kvVtm1bpdutra1Rr159xMbGIC0tFZmZGVIVXU1r1+4bREZGgMPJxYwZk/H99z+gU6cuMDYu38zMadNmoWHDhir3EzfaKmJjY4ulS1dg6dLfUFJSgvnzf2G2TZ48rVIHfLVurXygjSQerxC3b/vg4cMHiIl5g6ysLBQWFsjtAMzP5yI/n6t01mt2djazbpMidna1sXbtRql1i8XS09Nx/fo1BAQ8w9u3scjL44DH48k9jnh9FUmf2/0mIZpSkXjt6urOrGcWFBSIbt16MNvEMw8+blyUXuMohJnRm5AQj/T0dABA7dr2UjPjJCUmJuD69WsIDg5CQkI8uFwuk0b4Y/L+LX+sLNc1QDTjStFSGWJt2rQDsBMAmIFMmvLxNVTeeuXKaDq2lYWVlRVq17bX+HFVzVJv3rwlDAwMUVCQj4iI1xAIBBob5FIR+flcREdHAQBq1qyJhw99Vb5G3BlYVFSE5OR3TAef5O9MnfWq3N3bIi7ubXmKTQjRIIrBVRuDe/bsrXRWHQBmndaqILm8AJ8vUKtt08BAH3l5eXj7Nlbpfqo+e8lzZ2Vlqjy35PcteW6hUIiIiPD/ymaAZs2Uf55t2ihvEyCaUWUdgz8NaFrtU4lWhTNnTjEdK61aOWPr1h1lHk1eq5ZtmaZPZ2ZmYOnSRTIz1JQRd4qURfv2HdC//0B4e19FTk4Odu7cgZ07d8DOrjZatGgFFxcXdOjQCbVq2ch9/aBB3+HOndsICHiG1NQUbN68AZs3b4CDQz20atUazs6u6NixE8zM1OukUZYjesSIUYiPj8Ps2dOQkZGBLVs2oUGDRmVO0yKeLQiIRrirUreuA/M4I+PDa/Pz85nPvHZte43MFtTEbw0Q3RA1aCBqQBUKhXj3LgkvX4bB3/8R7t27jZKSEgiFQuzfvxu1a9eWGmGZkvJhJL6/vx8zy0ddHA5H5c2GWGFhIXJycgAAqampKhst5Z1L7P37NOZx/foNynQcotyEZiOrfSrRT00T14KPB6RwOLmIi4vDtWuXkZOTg0ePHqJ1a2elaUQB0ezcJUsWqrw5lvRxPMrISGcey+t0+FidOqr3KQs7OzssWeIBD48lzHPm5hbw9Fxb5kbBz5U68axOnbqIjY0BIOroqcyOwYkTJ+HRo4eIiXmDmJg3WL16BbS0tNCokeN/9xeu6NCho9prFbVu7SwzI768unf/FsOGjcT58x9mA7u6itYgrEyq0niLvXgRihUrliItTf00MPn5+WVOh6uvrw8zM3M0btwEnTt3Qc+efeR2rF64cA5//PG7wo5AeWX52Od2v/m16jGgSbVPJfqpVSReS69x9GFwXGpqCpKT3wEQXZsk1atXHxYWlsjKykRg4HOmUVI67Zn8a+WBA3vxzz+HFQ4y/Zg6dUt1r2tiZb0HEDe0asrHs/zKem+l6dhWFlZWml9SxcTERGoglzwsFgv29vaIiooEj8dDXl6eylkXn0JaWhozQO316/AK1SOr+j6VfB2MOtWp9qlEPzWKwVUbgx0c6lVpmlBVJNs2jx8/UqbXSsYIeVR99pLnXr26bMtUiJd1AgAul4vCwkIAokGcqgbmqPMdk4qrso7Bts1qwd3JGtxquKC8URUuKC+5zhGPx4NQKH+GhKaUlpZi3ryfmRFwJiYm6NSpCxo2bAQLC0vo6uqBzRZdDAICnuPsWa//ylm+cq1YsRpubm1w6tRxJh1pcvI7JCe/g4/PdbBYLHzzTUfMnTsfDg71pF6rrV0DO3bsxLlzZ3Du3GkmFVV8fBzi4+Nw5colsNna+PbbXvjll3moWVO9DiNFHBzqYcaMn7FunScEAgEOH96PP/7YXaZj5OcXMI+VpV37sM+HBX/F69eJjpMvsY/q46ijMn5rospcHdjb10Hfvv2RnDwL8+b9jISEeACiGxTJjsGKpsURzwpShybPJf19KF+kmZSNm01ruNRqifySAtU7f2UMaxhAi/XpR0Vr4lqgaEDKTz9NxYIFvyAkJBg7d+6AmZk5BgwYJPcYXG4efv55OlMpsLKyQocOnZhKkY6ODnNzeuvWTdy+7fNf+aXLW1Dw4bejakaXaB/NXFMl2dvbg83WZipjzs4uldox9qmp85lJfvaS30llMDIyxsGDR3D8+BFcvnwB6enpEAgEiIqKRFRUJM6fPwtdXV0MGvQdZsyY/cnXeHRwcJD6u3v3HpU+A0Kd335y8jvMmzeb+X7s7eugffsOqFvXAWZmZtDR0WE6s8+cOcWkOlJ1D6hs7UVl7ty5hc2bNzB/t2zZCi4ubrCzs4OhoZHU7N9Nm9YhOztb4fqgX9L95pfKsZk1GjpZoaga1hV1q6iuWJF43bixE4yNjZGXl4fY2Bjk5ubA1NRMap0icdoySS4urrhz55ZU6jPptY1kGyWPHz/CrAOkpaUFV1d3tG7dGrVq2cLAwEBqgKO4g0WduqU61zVJ6tSZJOOZpmOVZEOdiYlJmZenqMrYVpaMLOpS935L+jvJ/yw6BjVZjywoKGQeV9V9Kvny6dY3h46DGYRF1bBjUFcbLIrBACgGf07y8so/sF1Vu6aqz74iMaqk5MM1pLCw6ttRiKwqTSWqpcWCiaHyFFxEs0aNGovExATcvXsbUVERmD17Onbu3FtpqQpv3/ZhOgXd3dti8+ZtMDQ0lLuvOtPLVWGxWBgwYBAGDBiElJRkhIaG4OXLFwgKCkRsbAyEQiH8/f0QEhKM/fsPo1EjR6nXa2vXwJgx4zFmzHjEx8fhxYsQvHjxAkFBAXj3Lgl8fil8fK4jODgAhw8fV3s2mSLffPMhZWdwcFCZF403NPzQaSQeeaGM5IXYwODD9yD5nahzHHV8it+anV1trFjhialTfwQgWg8sOTkZdnaidF4GBh8+n7Fjv8fcufM1du6PGRh8CFpNmjjhyJGT5T6W9PfxZd/AfI60WFow1tH86GciX2VeCwwNDbF+/f8wduxwcDgcbN26Ce7ubeTO1Dl79jTTKdinTz94eHgqnB2tbIa75HVFnVlHPJ5mrqliBQUF8PBYKjVC8969O7h166bK9CdfCh6vUOUMBcnPXvI7qSz6+vqYOnUGpkyZjjdvovHiRSjCwkIREPAMGRkZKCoqwrlzZxAcHISDB49obJCNKtHRUdi160+p5/bs+Qvt2nVA3bqqZ15Wpn/+OcxUwidMmIRZs+YonNV68+b1Si/Pnj1/ARCtxbJp01Z07txV4b4bN65Veqwv7X7zS6WlxYI+1RU/mYrEay0tLbRu7QI/vwcQCoUIDAxEjx7fMjMP6tZ1kNvJLW6UjIyMQH4+F4aGRlINlJLLHgCitImHDx8AILr279y5V2FqL03VaRRR5/iS9wCajlVhYS+Yx6rScSnyuca28lD3fkv6O5HfLqEORSnoy0Pyt9GtWw9s2rS1Asf68B1VxX0q+XqwtFhg6Vc8kxRRD8XgsqnqGPypSV7bt2zZrrQeo/lzf/js7tx5qLBNXxXJSQ8Unz4fVZ9QnXxSovWJNqB3774ARGuZzZ49DVlZWZVyvmfPnjCPf/11odILSHJyssJt5WFra4e+fftj4cIlOHnyLE6dOsfk0S4oyGcaiBRxcKiHQYOGYPnylfj338s4dOgos3Zieno6jhw5XOEySo5QLC4uZlJRqktyhkhiouoFksUz6wBINTIZGhrCyEjUCPvuXVKZZsop8ql+ay1atJIKVJmZH1IESE6Jl0zPWRmMjIyZcrx/X7FObsk88mVJeUjI56iyrwWWlpaYPHkaAFEFYe/eXXL3E8cjNlsbCxYsVpoyWVk8srL6cF1JSlJ93VXn2lwW//vfBua8HTt2ZmY5bdq0nkkV86VT5zOT/Ow/ZacJi8WCo2NjDB8+Ep6e63Dlyk388cdupjM6JuYNzp8/90nKUlhYCA+PJcw6Qz169AQg6jxeuXKpRmJ5RTx7Jlro3tzcAjNmzFaa6rayf7vJye+Y30yXLt2UVqbz87kqU+5I+hLuNwlRR0XjteTMgqCg5//9X/7aRmLOzqIZDHw+n1mnSDyIp25dB5nr+8uXL5gBB0OGDFe63k9lX1fUuQeozFglOaBC0eerrs8ptpUXh8NBbm6u0n1Ey1KIfhd6enoy6ynq6Hy4NywtVR5Dy1pvV0byt1HROmtV36cSQsqHYnDZVHUM/tQkU3CnpVVu26bsuTXTrmpkZMQMMEpOfid3vXlJ6nzHpOKoY7Aa0tbWxqpVa9Gv3wAAQGxsDGbNmiqVj15TJNfAU5Uf+MkTf42fX1L9+g2wadMWJr1WaGhwmV7fvHkLrFr1YQR5WV8vz8cVirKOwmzevCXzWNwAp8zTpx/2+TiIt27tAkA0CkhT38Wn+K2xWCyw2Wzmb8lRKI6OTZgOz8DA50zjaWURp2fIzs5iFtUtj0aNHJlyh4W9QHZ2drmOI51KTnnQJaQyVfa1YOjQEahZUzRQ4sYNb8TEvJHZJzMzE4BoQIay9RqKioqkUqh8rFEjR6Yz7sWLEJXXlYCAZyrLr67r16/ixg1vAKL1V9ev34zZs+cCEHVmrFixFKWlX37Kn2fPnirdnp6ezgyasLGxKVcaVck0faoqJcqwWCy0a9ce8+f/xjynifsDdfz++/8QHx8HQLRu3YYN/0O3bj0AABERr7F7906lr5dOVaj5GCH+N2dnZycVpz+WkZGO6OhojZ9fuixluR99XKGZIJ/j/SYh6qpIvJacWRAUFCi1tpG4s/xjDRs2YtaFCwoKlFrbSF4KM/F1BVD9b/nx40cqy1wRb9/Gqlyz6NmzD/cAyhpQy8rX9x6ioiIAADo6OujXb6DGjg2oH9s+1DU+j3qG5KBkeV69esmsddW0aTOZtNvGxh/uD1VlMwoLC1VZng8DYpR/PmZm5mjQoCEAIDIyQup3XlbNmjVnHgcEPFeyp3gfzd2nEkIqhmKw+qoyBmtKWdrrJDt3K/uzVXZuf//yn5vFYqFpU1GMKigowKtXL5Xu//w5xadPgToGqyk2m40VK1Zj4MDBAIC4uLeYNWtahWc6fUwyJ7Cy3v5bt24iNjZGo+eWx9TUjJm1qGjdGGXEKSrL+/qPSV5Uraysy7y4e6tWrZjG8MDAAKUX1levXjLr91ha1kSrVs5S2/v3/1ChPHBgr8ZmGpT1t5aXl1emcwcFBTI5r3V19WBv/2GhaDabzaw5mJOTg1Onjpf3bailf/8Pa5vt3bu73I3NkuUuLi7GwYN7y3UcyY7myk7lQIgqlRl3dHR0MG7cRACiTp4DB2T/zYjjUXZ2ltIF0E+fPoXc3Byl5/rmG9Hi7FwuF1euXFK4b0zMG5WNVOpKSEjAli2bAIiudWvXboSenh5Gjx6LTp26ABBd5/fv36OR81Wl8+fPKE0d4uV1nOm46d69Z7nOITmIRBPXRzu72sxjyTSvleXWrZvMb8/BoR7mz18EAFi2bCVsbEQzPLy8TiitOGr6M5A9vujf3Lt3SUrj4aFDByr9M1P3frSkpAR//32wwuf73O43CSmL8sbrRo0cYWIiyoYSGxuDO3duM9tcXWUbGAFRI5Gzs2hwYmDgc6mBOfJeo+6/ZQ6Hg9Ony5/WXx0CgQBeXorrFjxeIc6fP8P8Xd549bHo6CisW+fJ/P3dd8MqbSaEqtgmvs4XFqpOCfYpnDp1Qmm8OXnyGPNY3vfRoEED5rGyQUrx8XFqNY6K46w6MVZcF+fz+ThwoPz3cra2dmjSxEmtcvr7P0Jc3Ntyn4sQonkUg9VTVTFYk8rSXufk1JTJKOLv74fQ0JDKLJqUXr36MAOjvbyOIyenfBMXAKBr1+7MY2Xtszwe77PPVPC1oI7BakxLSwvLl6/CkCHDAIjSTM6aNQVpaakaO4fkiLW9e3fJbdwIDHyOTZvWV/hcBw/uw5Mn/kpHefv43GA6kRwdm0ht27FjG168UD7y799/zzKPHR0bV6C0ohEukinvxCkDykJbuwbGjv0egKgxfMWKpXJT4CUnJ2PFiqVMRWns2PEyafS6d/+WqURERUVi2bJF4HIVLzL75Ik/ioqK1CpnWX5rL1++wLBhA3H8+BGVI6Oio6OwZs0KiffQQ2aB2h9+mMykidm3bze8vE4o/Y0UFhbi0qUL8PG5odZ7k9SjR09mJNKTJ/5YvXqF0kWO+Xw+Hj9+hMOHZRshJ0yYxHQU//vvWRw6tF9h42BxcbHcSp9kZT4yMqJM74WQylCZcWfYsOEwMzMDIB5JHym1vVmzZgBE18o9e+SnG/XxuYH9+3erPNe4cROYUeC7dv0pd9R4ZmYmPDyWaKRRv6SkBCtXLmWuJ3PnzmcqBgCwYoUn0yh4/PiRL370d2pqKtasWSV3Nubt2z7w8hJVNHV19TBy5OhynUP6+vha6b4bNqxl1ktW5Px5yfuDJkr2rLjk5HfMfZOOjg7WrdvEVCxNTEzg6bkebDYbQqEQa9asUjjzQLLzqTJiRNOmon9zOTk5Uo2xkk6cOIYLFyq/0levXn0m3ffDh75y/83yeDx4enrgzRvlsxe/tPtNQsqjPPFaS0uLyZ4BiOIRANSpU1dpx5X4NdHRUVKDaeQ1SjZt2oyJv5cvX5DbMJmbm4vFi+ernEmgCV5eJ3H7to/M88XFxVizZhVSU0WfV/v2HWTWGi2r/Px8nDx5HFOmTGKuL82atcDs2XPKdTxNxDZxLM3NzUFqakq5yqFJ4eEvsWPHVrnX55Mnj+PuXVFDubm5BQYMkJ1l2bZte7DZ2gCAf/89g8TEBJl93r9/j6VLf1NrQIs4zsbHx6lcT2nEiFGwtRXtf/Hiefz11x9K05mWlJTg9m0fnDt3Wmbb+PETmccbNqyWWk5ELCEhHhs2rFb5Hgghnx7FYPV8yhhcGcrSXsdisTBrlijeC4VCLF48X60sOwcO7FUZ61Wxtq6FkSPHMMecO3c23r1LUvqa6OgobNq0Tub5AQMGMutm3rlzC2fPesnsU1JSgvXrVyMlRbPLjRH5tKu6AKRqsVgsLF68HNra2jh37gySkpIwc+ZU/PXXPqkGo/IaNGgIjhz5GwUF+Xj40BcTJoxBv34DYGNji7y8PDx9+hgPHtyHlpYW+vbtz6RIK4/AwAAcPLgP5uYWaN/+Gzg6NoGlpSW0tLSQkZGOp08fS104J036Ser19+/fhZfXCdjY2KJt2/Zo1MgR5ubm4PP5SE9/j4cPH+DFixAAoin+33//g9LyREZGyKxbwOcLkJ2dhRcvQnD//l2mY83evg5+/HFyud73mDHj4ef3EMHBgUhOfofvvx+FAQMGo1mz5mCxWAgPf4WrVy+joCAfgCjAizsTJWlpaWHDhv9hypQfkJ2djYcPfTFs2GD06tUbTk5NYWhoBA6Hg9jYGPj7P0RSUhJu3fKFrq6uWuUsy28tPT0df/31B3bv3onmzVugRYtWqFu3LkxMTMHn85GWlorg4CA8ffqYaXS3tq6Fn3+eK3Nea2trrFu3Gb/9Ng/FxcXYsWMb/v33LLp27Y769RtAX98ABQX5SEl5h9evXyMw8DmKioowbdqssn4VYLFY2LhxK6ZOnYS0tFTcuOENf38/9OjRE05OTWFiYoqioiImbdrz50+QnZ0Nd/e2+OmnKVLHsrGxxcqVq7Fs2WLw+aU4cGAvbt68jh49esLBoR5q1KiBrCxRylI/v4eoVasWOnToKHWMhg0bwcLCEllZmbhxwxumpmZo0aIl9PT0mH3EM58I+VQqK+7o6eljzJjx2Lt3F4RCIfbv34OtW3cw20eMGI0rVy6Dzy/FuXOnERkZgR49voWVlTWysrLw4MF9BAQ8g4GBATp37op79+4oPFfr1s4YOXIMzpw5hYKCfMyYMRV9+/aDi4sbdHR0EBUVicuXL4LDyUW3bj1w//7dcr8vANi9eyciIkSdV9269cCwYSOktpuamsHTcx3mzJkJgUAAT08PHD9+GmZmsovVBwQ8k0ktJU5HBohi4ceVvMGDh0hVXMTy8vJw4sRRqeckGwZTUt7JrPnYpIkTunf/VtnbRffu3+Lu3duIjo7CgAGDYG9fB1xuHh498sPDh77MfrNn/yK3XOpo06Ytzpw5BQBYv34NRo8eCzu72kxKFysra6YCefnyBVy+fAEODvXg7t4GDRo0gqmpKYqLi5Gamoq7d28xnUkmJiYYNmyk0nOHhoYoHXgjqXZte6mKbGlpKVasWMrMev3553kyHUfOzi746aepOHBgL7Kzs7BmzQrs2LFLZo2/Nm3aMY//+usPZGdnoW5dB2hra//3XkwrlHZn9OjxePpU1MCwc+cOBAYGoH37DrCwsEBaWiru3LmF8PBXqFmzJho2bMTsWxlq1KiBYcNG4vjxIygtLcXMmVMxYMAgNGvWAvr6+nj7Nhbe3leRlpYKd/e2SEiIV7iGxud2v0lIZSlPvHZ1dYev7z0Aohn6gOIUZmLi7QKBgFlmoX79BrC0tJTZ18rKCj169MSdO7fA5XIxceJYDB48BI0aNQabzUZUVCS8va8iNzcHAwYMwrVrV8r79lVydXXHmzfR8PBYgps3r6Njx04wMjJGUlIirl27wnQqmZiYYvHiZSqPFx8fx3x2gOh6z+VykZGRjvDwVwgODmLqcwDg7t4Ga9ZslBkUqS5NxLY2bdoxcXnx4gUYOnQErK2tmXhjb18HderULVf5ysrKygq1atng9OlTCA4ORp8+/VCrVi1kZ2fh7t07CA4WzYRhsVhYutRDbqYeS8ua6N9/AK5cuQQul4vJkydi6NARaNTIESUlJQgPfwVv76soKuKhZ8/echukJbVp0w5v3kSjsLAQCxfOQ//+A2FmZg5xOG7Y0BHW1qK1m/T09LFly3bMnDkFeXl5OH78CG7e9Eb37j3h6OgIQ0Mj8Hg8pKWlIjIyAs+fP0N+PheDBg2ROW/v3n1x69ZNPHzoi4yMDEycOBYDB37HxPRXr8Jw9epl8Hg8dO3aXep3Rwj5PFAMVk7TMbgqlLW9rmPHzpg2bSb279+DnJwc/PLLTDg7u6B9+w6wtbWDtrY28vLykJAQj7CwULx8GQahUChV5yuvmTN/xps3UXj69AkiIyMwevQwdO7cFc7OLrC0rAmhUIicnGzExsYgMDAACQnxYLPZWLLEQ+o4RkbG+O23ZVi+fBGEQiG2bfsffH3vo3v3b2Fqaork5GRcu3YZ8fFxGmlHIapRxyABi8XCwoVLoK2tDS+vk0hOfodZs6Zg1679qF3bXvUBlLCwsMC6dZuwbNlv4PF4iI2Nwa5df0rto6enh8WLl4HPF1SoY1BcAcnOzsL169dw/fo1ufvp6+tjwYJF6NChk9zXp6am4PLlCwrPY2pqhlWr1qocwX32rJfc0Q8fc3V1h6fnOhgZGavcVx42m43ff/8Tnp4e8PW9h4KCAoXn7dq1O1avXqdwrZ/ate1x6NBRLF++GK9fh4PDyZUatf6xj9oYVVLnt2ZubgErKyukp6dDIBAgLOwFwsJeKD2uu3sbeHh4omZN+SOh2rVrj337DsPT0wPx8XFITExgRlDJw2az5d4IqcPa2hp//30ca9euwuPHj8DhcHDx4nkVr6kl9/muXbvj99//xJo1K5GZmYGEhHj8888hufva2NjKPKetrY0ZM2Zjw4Y1KC0tlWnAB4AnT4LUeFeEaFZlxZ2RI0fjxImjyMvLg5/fA7x+Hc7MWmrUyBFLlizDpk0bwOeXIiwsVGbWkKmpGdasWY8XL0KVdgwCwLx5C1BYWIgrVy6Czy/FtWtXZCo/o0ePRZcu3St0Q+vv/wheXicAiNbTW7Zspdz93NzaYOLEH/HPP4eQkZGBtWs9sW3bHzL7BQcHKbyOAICf3wP4+T2Qeq5Nm3YKOwaVHSs1NVVme//+g1R2DHp4rEJeHgcBAc9lOhYB0UCWqVNnYNSoMUqPo0yHDp3g4uKG4OBAJCUlYtu2/8mUc+VK6ZH08fFxzJp+8tjY2GDjxq1MI58i6sxKFRs9eix+/fXDGk/79u1m0oZ36tRF4Wfw449TEBDw7L9BNE9w4sRRmQ6mRo0cmUFZWVmZ2Llzh9R2Fxc37NlzQO2yfqxDh46YMmU6Dh7cB0CU9sbf309qHxsbW2zatBVnzqi+X6qo6dNn4c2baDx54o/S0lJcunQBly5J3++5uLhh/fpN+OGH8QqP87ndbxJSmcoar+WtSSS5No08jo6NYWRkBC73Q5pveccRW7x4OZKSEhEZGYGCggJmFrmkHj16YtGiZZXaKGljY4spU6Zj6dLf8PChr9TAFbGaNWti69Y/mJlgyty+7aOyowkQpY8eO3Y8vvtumMyAj/KoSGwbNOg7/PvvGcTHxyEyMkJmdsDkydMwdeqMCpdRHWy2NjZs2IIFC35BVFSE1KAnMR0dHSxatAxdunRTeJy5c+cjNjYGr169BIfDwZEjh6W26+rqwcNjFfh8gcrva9y4Cbh58zqysjL/G5glndHBw8OTSRcIiOLy338fx6pVy/Hq1Uukp6czg5jkYbFYsLaWX/9du3Yjli1bBH9/P/B4PJw7d1pqdiGbzcacOb/C1NSUOgYJ+UxRDFZM0zG4KpSnve6nn6bCxsYWO3ZsBYfDQUhIMEJCFK9HbmBgCCOjsi1Zpais27b9gd27/8Lp06dQWlqKe/fuKG0zsbKSXyfu0eNbLF68HNu2bUZJSYnc+Oji4gYPj1XUMfgJUMcgYcybtxBstjZOnDiK1NRUZjRK3boVG+XXoUNHHDvmhRMnjuLZs6dIT38PXV1dWFlZo337Dhg6dATq1q2Lq1cvV+g8W7fuwPPnTxEcHIjIyAgkJSUiJycHQqEQxsbGcHCohzZt2mHw4KFyp9L/888JPH36GCEhwYiKisC7d++Ql8cBi8WCiYkp6tdvgG++6YhBg76DiYmJnBKopqWlBUNDQ9SqZYOmTZujV68+aNOmbYUrdfr6+ti8eRsCAp7B2/sqQkNDkJmZAQCwsLBE69bO6N9/oFojRezsauPw4WN48OA+7ty5jZcvXyArKxOlpaUwNjaBg4MDXFzc0Lt333J3Zir7rTk5NcXlyzcQHv4KQUEBePkyDAkJ8UhPf4/CwkJoa2vD0NAIderUQdOmzdC9e0+0bu2s8pxNmzbDqVPncP/+XTx4cB+vXr1EVlYWeLxC6Ovro1YtGzRs2Aiurm7o3LkrLC1rluu9AaIO8e3bd+Llyxe4ceM6XrwIQVpaGrjcPOjo6MDS0hIODvXRurULOnXqzCw2L0+7du3x77+XcfXqZfj5PUBMTDRycnLAYrFgYWGBhg0boW3b9ujdu5/c1w8ePAQ2Nra4cOEcXr9+hezsbLVTwBJS2TQddwwNjTBy5BgcPizqzNi/fw+2b9/JbB80aAgcHZvg1KnjCA4OQlZWJgwMDGFjY4NOnbpg6NARsLKyUpnmDxCneFmJHj2+xfnz5/DyZRi43DyYm1ugadNmGDp0ONq378Cs7VoemZkZWLt2JYRCIdhsNjw91yuNP1OmTEdgYADCwkLx6NFDnD59EqNHjyv3+auKoaER/vxzD65du4zr173x9m0suNw8WFhYwtXVDaNHj4WTU7MKnYPNZuOPP3bh7NnT8PW9h7i4t8jPz5ebGuzKlZt4+tQfoaEhePMmGsnJyeByuWCztWBmZo5GjRzRpUtX9O07QGqEp6Y9e/aUGdRiZWWFFSs8Fe6rpaWF1avX4/vvx4DDycXevbvg5taG6SgXW7lyDVxc3HDr1g3ExMQgL4+D0lLNrfc3Zcp0uLi44swZL4SFvQCHkwtjY2PY2dVG1649MHTocJnsCpWlRo0a+P33P3Ht2mV4e19FdHQUioqKYGZmjoYNG6F3777o27c/M2tUkS/hfpMQTVM3Xjds2AhmZmbMrANA8dpGYlpaWmjd2gWPHj1U6zUmJibYv/9vnD9/Frdu+SAu7i1KS0tgYWEBJ6dm6NdvgNQaNpXJ1dUNx46JBoP6+T1g0rzZ2dVGt249MGbMuHLXl/T09GBoaARjY2PUr98ATZo4wc3NHS1bttZI2TUR2/T19XHo0BGcOnUc/v6PkJiYgIKCAqWpliuTtbU1Dh48gosX/8WtWzeRmJiIwsICWFlZo23b9hg79nuV95hGRsbYs+cgLlw4Bx+fG4iLi0NpaQmsrKzQrt03GDVqLBwc6qnVdmFlZYUjR07i5MljeP78KZKTk1FYWKB0HUR7+zo4dOgonj59jDt3biMsLBQZGekoKCiArq4erKysUL9+A7i4uKFz5y4KMyfo6enh99//hI/PDVy5cglRUZEoLCyAhYUlnJ1dMGLEaLRo0bLCbTCEkMpHMVi+yozBn0p52uv69x+Irl27w9v7Cp48eYw3b0RthHx+KYyMjGBnZ48mTZzQpk1bdOjQsdyZBT6mrV0Dv/zyK0aOHI2rVy8hMDAAiYmJ4HByoaWlBRMTU9StWxfNm7dEu3bfSKW2/diQIcPQurUzTp06gWfPniArKxOGhoZwcKiHvn37Y9CgIQontBDNYgmV3ZWUUXR0NIqLS8BiacHWto6mDksIIYQQQgghhBBSZZKTkzFsmGhtOnkzywkhhBBSOSgGk+ouJSURQqEAOjo14OiomXUzlQ+JJYQQQgghhBBCCCGEEEIIIYR8FahjkBBCCCGEEEIIIYQQQgghhJBqgDoGCSGEEEIIIYQQQgghhBBCCKkGqGOQEEIIIYQQQgghhBBCCCGEkGqAOgYJIYQQQgghhBBCCCGEEEIIqQZYQqFQqKmDRUdHo7i4BCyWFmxt62jqsIQQQgghhBBCCCGEEEIIIYRUKykpiRAKBdDRqQFHR0eNHJNmDBJCCCGEEEIIIYQQQgghhBBSDVDHICGEEEIIIYQQQgghhBBCCCHVAHUMEkIIIYQQQgghhBBCCCGEEFINUMcgIYQQQgghhBBCCCGEEEIIIdUAdQwSQgghhBBCCCGEEEIIIYQQUg1QxyAhhBBCCCGEEEIIIYQQQggh1QB1DBJCCCGEEEIIIYQQQgghhBBSDVDHICGEEEIIIYQQQgghhBBCCCHVAHUMEkIIIYQQQgghhBBCCCGEEFINUMcgIYQQQgghhBBCCCGEEEIIIdUAdQwSQgghhBBCCCGEEEIIIYQQUg1QxyAhhBBCCCGEEEIIIYQQQggh1QB1DBJCCCGEEEIIIYQQQgghhBBSDVDHICGEEEIIIYQQQgghhBBCCCHVAHUMEkIIIYQQQgghhBBCCCGEEFINUMdgNRAYGID27V3Rvr0rDhzYW9XFqVLqfBZDhgxA+/auGDJkwCcu3edj5sypzOdUUfT7I6T6oX/3H3zNcUf8vmbOnFrhYx04sJc5XmBggAZKRwghRBWK11+vr+G7/ZrvoT4XV69eZj7jq1cvV/r5NNnOQMiX7mu4ThNSUariwqeOU9WNdlUXgHwZrl69jHXrPOVu09LSgoGBAWrWtELjxk7o3r0HOnfuCm1t+nmpwuMVIjo6Cq9fv0ZExGtERIQjPj4OfD4fAODh4YmBAwerdSyhUIjExAS8fh3+37FeIzIyAgUF+QAAFxc37NlzoNLeS0VcvXoZKSnJAICpU2dUcWkIIZ8DijuV6+XLF7h27QrCwl4gNTUFhYWF0NPTR61atdC8eQv06dMP7u5tq7qYUsQVZltbO7VjIyGEkMoREhKMGTMmAwCcnJrin39OKN3//fs0DB7cj/l70qTJmDFjttLXXLp0ARs3rgUA9OrVB2vXbqxgqT8vBw7sxaFD++VuMzAwgKGhIYyMjFG/fgM0aeIEFxc3tGrV+hOXUjEvrxPIy8uDsbExxowZX9XF+ex8bfctCQkJ8Pa+gtDQYMTFxYHLzQMAGBkZwcbGFvXq1YeTUzO4u7dBw4aNqri0hHzdKAZXnGQMnjx5GrVFEuTl5cHLS/RvqXHjJujatXsVl6jyUQsaqTCBQAAulwsul4u4uLfw8bmOxo2dsGHDZtjb16nq4n3WBg/uDw4nVyPH+vPP7Th16rhGjvWpXbt2BcHBgQCoY5AQohrFnfIrKirCxo1rceOGt8y2/HwuYmO5iI2NwZUrl9ChQyesWbMeRkbGVVBSWeKKm4uL21fRwEYIIV+y5s1bQE9PDzweD9HRUeBy85TGi8DA51J/BwUFqjyH5AxyNzf38hf2C1RQUICCggKkp6fj7dtY3L17GwDg4FAPY8aMw5Ahw8Fisaq0jF5eJ5GamgIbG1vqGJTja7lvKS0txa5df+LMmVPMAGZJ2dnZyM7OxuvX4bh+/RoA4LfflmL48JGfuqiEVBsUgwnRvLy8PCZ29+8/iDoGCZHHzc0do0aNZf4WCATIzc1FWFgobt/2QVFREaKiIvDLL7Nw/PhpGBgYVGFpy+7ixWuf7FwCgfSNtY2NDUpKSpGZmVHhYxkYGKJWrVp4+za2QmWsKDc3dzx5ElSlZSCEfNko7miOp6cH7t27AwBgs9no1q0HWrVqDQsLS2RlZeLFi1Dcu3cHAoEA/v5+WLhwHnbvPgAtrcrNPj916gwaGEIIIV+QGjVqoFUrZzx79gR8Ph/BwUHo3Lmrwv0/boQMD38JHk80W10R8cBBAHB1bVPxQn/GevbsjV69+jB/l5SUgMvNQ0ZGBsLDXyEkJBgFBfmIj4/D5s0bcPu2D9as2QhLS0u5x/sa6mBfw3v4GqxZsxI+PjcAACwWC23atIO7exvUqmUDbW1tcDi5iI2NxYsXIYiMjAAg2zZBCNEsisGkuhg4cPAXPbjmc0cdg6TMatWyldtrPmTIMHz//Q+YNWsqsrOzkZz8DmfPeuGHH36qglJ+Gbp06Ya6dR3g5NQUTk5NYWZmjjVrVsHb+0qZj1W/fgOMGTMeTZs2g5NTU9St64CgoEDMnj2tEkpOCCGfDsUdzQgJCWY6BQ0MDLF79344OTWV2mf06HF4/Tocs2dPQ0FBAUJCguHn9xBduiiuaBJCCKmeXF3d8OzZEwCiRkd1GiU7dOgEf38/lJaWIjQ0FO3atZe7f0JCAtLT3wMArKysUbduXQ2X/vPi4FBP6cj0/HwuLl48j/3796KoiIfAwAAsXDgXe/YcUNqwS0hFPHzoy3QKmpiYYMuWHWjd2lnh/u/fp+Hq1SuwtLT6RCUkpPqiGEwIqajKHf5Nqp369Rtg0qQpzN8PHtyvusJ8AVauXINJkyajffsOMDMzr9CxhgwZjnnzFqBPn35wcKhX5allCCHkU6C4o74nT/yZx0OHDpfpFBRr2rQZhg4dzvwdEkKj9QkhhMiSTC0mmXLsY2lpqXj3LgkA0L//QNjY2AIAgoKeK3xNUBClMJNkaGiE8eMn4uDBf2BiYgIAeP06HDt37qjagpGvmmTq+RkzflbaKQgA1ta18NNPU9Cjx7eVXDJCCMVgQkhF0YxBwsjKysL8+XMQEfEaAPDdd0OxaNEysNnsMh3H1dWNeZyYmKBwP6FQiBcvQvH06WOEhb1AXNxb5ObmgMViwcTEFE2aOKFr127o23cAatSooda5nzzxx4UL/+LlyzDk5XFgbm4BJ6emGDZsBNq1+0atYwwZMoBZK+Hj9G4nThzDzp3bAQAuLq746699Cj+fhIR4TJo0HgUFBdDT08Pffx9H/foN1CrD50YgEMDb+wquX7+Gt2/fgsvNg4WFBVxd22DUqNFwcmqm8LWBgQHMrMWPF/SdOXOqVHoCAGjf3lXmGP37D8LKlaulnouOjsKlS+cREhKMlJQU8Hg8GBkZwczMDFZWVmjevAW6d++psOGbEFL1KO6IfKq4k52dxTyuU0f5qE/J7YWFBWq9Dw6Hg3//PYN79+4iNTUZxcXFsLGxRceOnTBu3ARYWtZU+FrJxd937dovVQH9OC4EBwfKjRUeHp4yaUYePXqIGze88fr1K2RkZIDP58PExBRmZmaws6sNZ2cX9OrVB9bWtdR6j4QQUh0pitdNmzaHgYEBCgoK8OZNFPLy8mBsLLvGkWQDo6urO1xd3eDtfVXpGkeS2z5ulMzJyYaf3wMEBQUiOjoKKSnJKCzkwdDQALVq2cDFxQ1Dhw5XWfeSrIs8eRIEgUCAGze84eNzHTExMcjKygKfX8qks0xOTsawYQMBfKifvH//HufOnYaf3wOkpqYCAOzt7dG1a3eMGTMOhoZGSstQVo6OjeHh4YlFi+YDAC5fvoiJE39ErVo2Uvspq4NJKk+dSnzfIpaamiI3Ln98XvE+Li5u2LPnAPLy8nDhwr/w9b2L5OR3yMnJgbOzK/bsOVCm9yCpqKgIFy/+i1u3biIpKRGFhYWwsrJC27bfYMyY8UpnvUhm7jl//irs7OwU7nv16mWsW+cJQPb+oyL3LQDA5/Nx69ZNPHhwH+Hhr5CTkw0WiwUrK2s4O7ti6NDhaNpUcf1b0q1bN3H16iVERUUiPz8flpY14ezsguHDR6FFi5YqXx8X95Z57OIi+x404f79u7h8+SKioyORk5MDU1NTtGjRCqNHj4WLi5vC18n7DqKiInH2rBcCAwOQmZkBExNTNGvWHBMn/ojmzVtIvd7f/xEuXDiH6OhIZGZmwszMHG3btsPkydNgZ1e7Ut4rIeVBMbjqYvDHZRQKhbh+/Rq8va8gNjYW+fn5sLW1Rdeu3TBu3ASYmpoxr83P5+LSpYu4desG3r17h+LiItSpUxd9+/bHqFFjFbY3yHufmZkZOHPGCw8f+iI1NRU1atRAvXr1MHLkGHz7bS+piSExMW9w+vQphIQEIS0tDbq6umjRoiXGj5+odidvZmYGLl48j6dPnyApKREcTi4MDY1Qt25dfPNNJ4wYMYoZpKQMh8PBqVPH8eDBfSQnvwObzYatrR26deuBESNGSX1eiiiLt2IVbeeR/MzFvL2vyM3m93FbhVhRURG8va/Az+8BoqOjkZOTjRo1dFCrVi24ubXByJGjUbeug8r3+6lRxyABACQlJWLevNlIShKNIpk8eSqmTp1ZrmPp6Ogwj4uKihTut379aly9elnutvT090hPfw8/vwc4fvwotmzZofQmXiAQYNOm9bh8+YLU82lpqUhLS4Wv7z2MHj0WXbpUbOHQceO+x/PnT/HkiT+Cg4Pw998HMWXKdJn9SkpK4OGxFAUFosbU+fMXfbGdgvn5XCxaNF9mBFJqaiq8va/g5k1vTJ06A5MmTf5kZTp8+AAOHtwHgUAg9Xxubg5yc3MQHx+HgIDnePjwAU6ePPvJykUIUR/FHfVoMu6Ym1swjxMT45WeNyHhQwdr/foNVZYzJuYNFiyYK9VQCIgalOLi3uLy5YtYu3Yj2rfvoPJYmsDj8eDhsQR+fg9ktmVmZiAzMwMxMW/w8KEvUlKSsXDhkk9SLkII+dKoitetWjnjyRN/CAQCBAcHokuXbjLHEDcw1qtXHxYWFnBxcYW391WEh4ejsLAQ+vqyqTAlBw+6uX1Y2+jduySMGjUMfH6pzGs4HA44HA6io6Nw9qwXpk6dgZ9+mqrW++RwOFi8eD6Cg9WfJR8YGIBlyxYhNzdH6vmoqEhERUXi4sV/sXXrH2jSxEntY6qjS5ducHJqioiI1ygpKcH169fKVReryjpVZGQEFi+ezzTkasL792lYsGAuoqOjpJ5PSkpCUtJZXLt2GYsWLcOAAYM0dk5Ni4l5g2XLFiE+Pk5mW0JCPBIS4nH58gWMGDEav/66UOFgMR6Ph+XLF+HRIz+p51NSkpGSkgwfnxuYNWuOykZZPv/DWoFZWVkabdMoKirC6tUrcPfubannMzIycP/+Xdy/fxc//zwX33//g1rHO3fuDHbs2IrS0g/XhvT09/D1fY+HD32xYoUn+vUbiNLSEmzevBFXrlyUen16+ntcu3YF9+/fw86de9CsWfMKv0dCKopisGKfOgYXFBRg6dKFePr0idTz4vru7du3sHv3ftSqZYOEhHgsWDBXZtBydHQUoqOj8OiRH7Zv3wldXV2V5w0NDcGSJQulBvmKnw8NDUFAwDMsXrwcLBYLFy/+iy1bNkt9P0VFPPj7+8Hf3w+LFi3DsGEjlJ7v9OlT2LNnJ3g8ntTzubk5CAvLQVjYC3h5HceaNRuU1u3Dw19h4cJ5yMrKlPsZXL58EVu37lD5/tWhyXae8ggKCsSqVcuZ9LtixcXFiI3lIjY2BufPn8W0aTM/u2VvqrRjUCgQQFjErcoiVAmWrhFYWp9PFtfXr8Mxf/4vyM7OApvNxsKFizF0qPILhTKxsTHM449HLkri8XioUaMGWrd2RvPmLWBvXweGhkYoLi5GUlIi7t+/izdvohEfH4f583/GkSMnFY722LFjG9M4y2az0adPP7i6uqFGDR1ERUXiypVLOH36FNLS0sr9vgDRYtsrV67BhAljkJmZgb//Pgh39zZwdpYePbdz5w5ERYkW3u7ZszcGDx5SofNWpbVrPREYGAB7+zro338Q6tSpAy43D35+D/Ho0UPw+Xzs3bsLBgaGGDVqTJmOPX36LOTm5mDfvt3M72bz5m0y+0n+jh488MX+/XsAALq6uujUqQtat3aBubk5BAIBMjIyEBUVgWfPnlbgXZOvmVAgAJ9b/WIP2+jziT0Ud9SnybjTtWt3/PPPIQDAhQvn0atXX7mzql+/DsfFi/8CACwsLNGv3wClZRQPIElNTYGLiyu6d+8JCwsLpKWl4ubN64iKikReXh4WL16APXsOlrmhRRwXFi9eAABo0KAhpk+fJbOfZKVv795dTKegubk5vv22Nxo0aAhTU1MUFxchOTkZ4eEvlabdIYRULYFAgGJeflUX45PT0TOE1hcUr11d3ZlU1UFBAUobJcWzf8T/5/NLERoaLNOwJLm2kY2NrdTsnZKSEvD5pbCxsYG7e1s0bNgIFhaWqFGjBrKzs/HqVRju3LmNoiIe9u/fAxMTU4wYMUrle121ajmCg4PQoEFD9OrVB/b2dZCfny+T3UQsLS0FS5f+Bg4nFx07dkanTp1hZGSMxMREeHtfQVJSItLT0zFnzkwcOXICtraKZ5+VR58+/ZjZI0FBgWXuGKxInWrpUg/weDxs2rQO2dnZMDc3x5IlHjL7OTjUk3tuDicXixbNR1paKtq2bY9OnbrAwsICmZmZMg2f6uLzS7Fs2SJER0ehceMm6NOnH2rVskFWVhbu3buN4OAgFBUVYf361TA2Npb7O9WU8ty3AKLO0pkzp6KgQHTdc3Z2QYcOnWFrawuBQIA3b6Jx7doVZGVl4ty50ygtLZH7uQPAypXLmE5BXV09DBo0mJkx9/JlGK5evYKdO3coXdcSAOzt6zCzBs+cOQVXVzeNLVmyfv0a3L17Gw0bNkKvXn1Qu7Y9eLxC+Pk9hK/vPQDArl1/okWLVnB2dlF6LH9/P9y7dwempqYYNGgIGjVyRGlpKfz9/XDnzi0IBAKsW7cGLVq0wpkzXrhy5SIaNGiIvn37w9bWDhwOB97eV/Dq1Uvk53OxcuUynDp1Tu0MIl8jgUCgdMDl10pXV5diMMVgudavX42nT5+gefMW6NmzN6ysrJGRkY6LF88jLu4t3r1LgqfnCvzvf79jzpyZeP8+DT169ETbtu1hZGSEt29jcPbsaXA4HAQFBeDIkcOYNk35wOi0tBQsXjwfXC4XAwYMgouLG3R1dREe/grnz59DUREPFy+eR4sWrWBoaIhNm9bDzMwMAwd+B0fHxuDz+Xj06CHu3LkFAPj99//B1dUN9erVl3u+vXt3Me0F+vr66N69J1q2bAlTUzNwOLl4/vwZ7t+/Cw6HgwUL5mHXrr0y7RIAkJz8DnPnzkJeXh4A0f3AgAGDYGtrh9zcHNy/fxcBAc+xePECGBlVPLNCRdt5LCzMsXnzNmRnZ2HTpvUARDNlR40aK3Ouhg2lB037+z/CokW/orS0FFpaWmjfvgPatGkLKytrFBcX4/XrcFy/fhVcLhd79vwFAJ9V52CVdQwWv3mGgodHISzkVFURqgxL3wQGnSdCp1Hbqi4Knjzxx7Jli1BQUABdXV2sXbuxQjfJxcXFOHbsCPO3shz0w4ePxKJFyxROP548eRqOHfsHu3fvRFJSEk6f9sJPP02R2S80NARnz3oBEF24tm//S+rGsU+ffhg7djxmz56O+/fvlvOdfWBhYYFVq9Zi7txZ4PP5WLXKA8eOeTHv4+FDX5w5cwoAYGdXG0uWLK/wOavS/ft30bVrd6xZs0FqNMvQoSPg43MDq1evAJ/Px65df6JTp85lSrsh/p68vE4yz6mqnFy6JGqsZrO1sW/fYYWpQvl8Pl6+DFO7LKR64Dx7hpTjR8HnVL/YwzYxge33E2HStmpjD8WdstNU3GnatBnGjBkPL68TKCjIx+TJE9G9+7do2bI1LCwskJWVhRcvQnDv3h0IBALY29tj48atclPSSIqKigQAzJ79CyZMmCS1bcyY8di+fSvOnTuNoqIirF27CidOnClThfvjuGBqaqY0VvD5fFy9egmAKJXM4cPHFX7n+flcZgQuIeTzER8RhIDbZ8EryKvqonxyegbGcO85Eg5OlZO2T13qxmtVaxxJrm0kTv1tb18HVlbWSE9/j8DAAJlGSWVrG5mbW2Dv3kMKOwqGDx+JKVNmYN682UhIiMfevX+hf/+BMDAwUPp+Hz9+hBEjRuHXX3+TmoU1ZMgwufsHBgaAzWZjzZoN6N27r9S28eMnYOXKZfD1vQcOJxebN6/Hjh27lJ6/rFq2bMU8fv36VZlfX5E6lThV+vbtWwGIOp5U1eEkxcS8AZvNhqfnOvTt27/MZZcnPT0d6enpcmfSjRo1BseO/YNdu/6EQCDAxo3r4ObmrvE0r2JlvW8BAB6v8L9/b/nQ09PD2rUb0blzV6l9+vTphx9++BGLFy9AYGAALl48jx49eqFt23ZS+/n43GDW3La0rIldu/ZJNQL36zcQI0aMxuzZ05kOOEX69OnHDLTy9b2HadN+xLBhI9CmTTvUrGml9LWq+Phcx9ix32POnHlS94WDBg3B4cMHsX//bgiFQhw/fkRlx+Ddu7fRpIkT/vhjF8zMzJnnBwwYhAYNGuLAgb3g80vh4bEEUVGRcpcv+O67oZg3bzYCAp4jKSkRDx7cx7ff9qrQe/xSvX0bg8eP/cDjFVZ1UT45PT19fPNNJ7UyplQmisEin1MMvnPnlty01t99NwxTpvyAmJg3CA4OxJw5M5CTk40dO/6SWVqkZ88+mDRpPIqKinDu3Gn8+OMUpQMQAgMDYGJiioMH/5FawqlXrz7o1KkLfv55OoRCIQ4d2o+CggI0a9Yc27f/BVNTU2bf/v0Hol69ejh06ABKS0tx9qwXfvttqcy5Hj9+hCNHDgMAWrRoiQ0btsDa2lpqnyFDhiM0NAS//joHBQX5WL16Jc6evQhtbenupc2b1zOdgt9+2wuenuuk3ueIEaNx6tRx/PHH7wrfe1lUtJ1HT08fXbt2R3JyMvNcrVq2KmN3RkY6PD09UFpaCnNzC2zZ8jtatGgltU///gMxYcIk/Prrz4iJeYP9+/ega9fuCjtnP7UqGwZRcP9wtewUBABhIQcF9w9XdTFw/fpVLFw4DwUFBTAxMcEff+wuV+OsQCBATk42HjzwxYwZUxAe/hIAoK2tjfHjJyp8nYuLm9KcxCwWCxMn/sg08srL7QsAJ08eg1AoBCBqlJQXoGrWtMK6dZvKvG6VIm3btmMaP9PSUrF+vWj9u/fv32PdOtFjNlsba9ZsgJGR8gbVz12tWjbw9Fwnd4p77959MXKkaJZgUREPZ854VXp5xA24TZo0Ubp+IJvNVrk4Oql+kv8+XC07BQGAz+Eg+e+qjT0Ud8pPU3Fn3rwFWLhwMSwsLMHn83H7tg+2b9+CFSuWYvv2Lbhz5xZMTc2wZMlyHD3qBUfHxmqVr1u3HjKdgqIysTF//m/MWjhv38bCz+9hGd552eXkZIP736zgrl17KP3ODQ2NNJ7ijRBScU9vnqqWnYIAwCvIw9Obp6q0DGWJ105OTZkOljdvopGbmyu1XXptow/rhYnXK5Pc/uE18lOYAYCpqanKTgI7Ozv89psoRTSXy2U6SZRp0sQJ8+cvKlPcHjVqrEyDJCCaceLpuY7JYvDkyWOZ9JYVZWNjyzzOy8tDaWlJmV5f1XWqESNGaaxTUKxp02aYP/83ud/hhAmTmAa+7OwsXL0q/x6vqly6dJFpvF+8eLlMp6CYkZExNmz4H/Nv7uTJYzL7HD9+lHm8bNkKuY2P9es3wPLlK1WWq2fP3uje/Vvm77CwF1i9eiUGDuyDQYP6YOHCeTh8+CCCggKl0o6qw8XFDb/88qvcwWI//PAjrKxEDdLPnz+VSg8qT40aNbBx4xapTkGx77//AQYGhgBEszIbNGiI335bKvM70dbWxpQpHxr8xbOwqqNHj3yrZacgIOqkf/TIt0rLQDFYPZ86Brdt217uWrf6+vpS9eCIiNeYPHm6TKcgILr29u7dD4AofeqrV6onMyxYsEiqU1DMzc0d7u6iQd+idR4LsH79ZqlOQbEJE35kOmefPHks9zx79+6CUCiEubk5tm37U6ZTUKx1a2f88suvzHnv3bsjtT06OopJt2pjY4MVK1bL7fwcO/Z79OjRU9HbLhNNtfOU1fHjR8HhiP7Nbdz4P5lOQTFra2usX78ZbDYbfD4fp09X7X2+pM9jfjT55I4e/Qdr1qxCaWkprK1rYd++wyov7mLe3lfQvr0r81+HDu7o2/dbLFr0K9M4q6Ojgw0btmikB7x1a1G5kpISZXJHFxcXw99flCLDyMgIgwcPVXgcR8fGaNu2fYXLIzZt2kxmwW5f33s4c8YLnp4eTBmnT5+p1oLen7vhw0fJzTkuNm7cBOZm/uOAUBn09PQAiCqz4hEohJDPH8WditNU3Bk0aAjmzJkHc3PZxhNA1GB27NgReHtfVbtsytZ/0dLSwrhxE5i/7927rXBfTRDHCUDUAEQIIUR9ZY3XbDYbzs7OAAChUCiT9ks8g8HBoR4sLWsyz4sbJSMiIpCfL50yNjhYfkNmWYhjOQC1soiMGDGqTLPZP45tH9PX18fw4SOZvzVdTzI2lm4Ay80t28C3qq5TyUvPVVGS9VJ5Jkz4cK9S2fciZSVupLSyskafPv2U7mtqaoaOHTsBEK0DVlxczGxLTk5mUss7ONRDx46dFR6nY8fOKu+bWSwW1q3bhFmz5sg0uqanp8PP7wH279+NWbOmYtCgvjh4cB+z3rUqY8eOV5iWlM1mMzOVioqKmE5TZe9FUeYiXV1dqc7voUOHy8xuEWvRoiWz7e3bWJXvgRBNoxisnqqIwSNHjla4TXIADZvNxrBhwxXuK/l9qrrOmJtboGfP3mqdt1OnLgpTpurp6TGdi8nJ72TSBL95E83UmwcPHiK3c1FSnz79wGaLrpVPn0p3NEpmTBo2bKRU3fxjygZ1VwZl7TxlJRQKcf36NQCiLA7yUqpKqlevPrOkysefWVWqslSiBt1+qvapRKuCUCjE779vYVKONWjQEDt2/AVr61oaO0eXLt2wcOFitY5ZWlqK+/fv4sGD+4iKikRGRjoKCgpkFkAXe//+vdQC2dHRUSgpEY2ObNXKGTo6OkrP5+7eFo8fP1L/zSihra2NNWs2YuLEMeByufj99/8x29q0aSd35sSXqK2KtIPW1taoV68+YmNjkJaWiszMDKkbDk1r1+4bREZGgMPJxYwZk/H99z+gU6cuKlPdEQIAdj/+VO1TiX5qFHc+r7gTGRmBxYsXIDU1BfXrN8D8+Yvg6uoOU1MT5OaK1js4fPgA3r6NxdatmxAVFYmlSz2UridjaGjErFujSJs2H2JJeHjZU56VhaGhEVq0aImXL8MQEPAMCxfOw8iRo+Hq6l6t14oh5EvSrs/Yap9K9FOrSLx2dXVn1jMLCgpEt249mG3imQcfNy5Kr3EUgg4dOgIAEhLikZ6eDgCoXdteamacpMTEBFy/fg3BwUFISIgHl8tFURFP7r7itZKUad26bKlb69dvACsr5akU27RpB2AnADADmTRFnLlArKzrvlVlncrKygq1a9tr/Lgfp9T8WPPmLWFgYIiCgnxERLyGQCD4LNYSy8/nMrNZatasiYcPVc9WEncGFhUVITn5HdPBJ/k7c3dvI/e1ktzd2zJrCCrCZrMxceKPGDVqDB4+fICnTx8jLOwFEhMTpO6fs7IycfDgPty6dRO///6nyu9Y0cwKMclrT16e8rqbqoFxlpaWzONmzRTfs2pra8PU1AyZmRnVehByx45dq30q0U+NYvDnH4OVXbMk20Dr1nVQmsFH8nqk6jrTtGkzpbMopa9tzZUeS7yvUCgEl5snlRUuJCSIecznC1SmmQYAAwN95OXlyXRuStb1xTMaFWnWrDkTlyuqou08ZfX2bSzTuWhsbKLWZya+5xB3zsrLzPepVVnHoE6jtqjRwB3CIm5VFaHKsHSNwKqiG9AzZ04x6bVatXLG1q07lE63lefjBTg5nFzExcXh2rXLyMnJwaNHD9G6tbPKnv/4+DgsWbKwTCOx8vOlfy8ZGenMY3v7OipfX6eO6n3Kws7ODkuWeMDDYwnznLm5BTw912psUe6qVqdOXbX2iY2NASAaOViZHYMTJ07Co0cPERPzBjExb7B69QpoaWmhUSNHtGjRCi4urujQoWOlrRlBvmwmbdvC2N0dfG71iz1so6qJPRR3Pp+4ExPzBtOn/wQej4dmzVpg9+590NP7MCPc0tISvXr1QefOXTBr1jSEh7/C5csX0KxZMwwZonjUY+3a9irPbWZmDmNjY+Tl5TGVzcq0cOES/PzzdHC5XPj5PYCf3wPo6uqhWbNmaNmyNdzc2sDNzV3hiHFCSNVycHJFncbOKOZVvKHgS6OjZ1glnRUVidfSaxw9Zx6npqYgOfkdAFHDpaR69erDwsISWVmZCAx8zjRKSqc9k36N2IEDe/HPP4fB5ytPLyj2cSyXR1HKLEXKeg+g6dj3cWNiWe+tqrJOZWWlucFhYiYmJiob+FgsFuzt7REVFQkej4e8vDyVsyI+hbS0NKbh8vXrcCxevKBMr+dIDHiszPtUPT199OrVB7169QEAFBQUIDLyNYKDg+Djc4PpYIyPj8PChfNw7JiX0vssMzMzpeeTHMxVVFSsZE+o/B5r1PgwkE/1vjX+O2eR0v2+ZvXrN4SDQ/1q+Rno6upSDP4PxWBpyq4dkoOFy3I9UvVvTLPXNsnzSl9TU1I+rK13/PgRpcf5GOejQfdliUOScbkiNNHOU1aSn5m/vx+TVUpdHA5HZef2p1ClrSEsLS2w9Mt2A0sqRjLvO4/Hg1Aov+dcGUULcP7001QsWPALQkKCsXPnDpiZmWPAgEFyj8Hl5uHnn6czF2crKyt06NCJCU46OjpMML516yZu3/b5r/zS5ZVMU6FsevKHfRSnxCwve3t7sNnaTFB0dnap1I6xT02dz0zys1c3dUh5GRkZ4+DBIzh+/AguX76A9PR0CAQCREVFIioqEufPn4Wuri4GDfoOM2bM/uLXeCSax9LSgnYZG09I+VHc+Xzizu7df4LHE43knDdvgcKy6enpY+7cBZg+/ScAgJfXSaUdg/r6qj8H8XHz8vJQWFi5cQIQrbdx7JgXDh3ajzt3bqGwsBBFRTwEBwchODgIR4/+DQsLS0ycOAmjRo39LGYMEEKkaWlpQc+A7uM+lYrE68aNnZjBH7GxMcjNzYGpqZnUOkXitGWSXFxccefOLanUZ9JrG8k2Sh4/fgSHDu0HIPqNuLq6o3Xr1qhVyxYGBgZSnQniDpaPY7k86sR0ScqWWvhwzA/7aLqOJNkgZWJiUuaBLlVZp6qMEfLq3m9Jfyf5n0XHYEVnpokzWQBAQcGHGV6VfZ9qYGAAFxc3uLi44ccfp+Do0b+xZ89fAEQzKW7f9lG6jqQm771YLPWPVZZ9qzMtLS21rnNEMygGf/4xWN1rliavMZ/q2paXV/5OMskYBECqrv8p2ks01c5TVpqM3VWJhklXM6NGjUViYgLu3r2NqKgIzJ49HTt37lW4zlBZGBoaYv36/2Hs2OHgcDjYunUT3N3bMAu+Sjp79jTzj7ZPn37w8PBUmN7rxYsQhecUL54KgGnsVEbTqRAKCgrg4bFUaqTMvXt3cOvWTWYk3ZeOxytUOVJU8rOX/E4qi76+PqZOnYEpU6bjzZtovHgRirCwUAQEPENGRgaKiopw7twZBAcH4eDBI3RDS0gVorjzecSd4uJiPHv2FIDoPbRsqTx9U6tWraGvr4/CwkLExb1Ffn4+DA0N5e5bWKj6cwA+fBb6+pUfJwDA1tYOHh6eWLRoGV69CkNYWBhCQ4MRHByIgoICZGVlYseObYiOjsKKFas/SZkIIeRzVZF4raWlhdatXeDn9wBCoRCBgYHo0eNbZuZB3boOqFlTdlS0uFEyMjIC+flcGBoaSTVQurlJp0IsKirC4cMHAIhi2c6dexWmsi4srNwUeOocX/IeQNN1pLCwF8xjZakRlfma6lTq3m9Jfyfy72vUoSg1WXlI/ja6deuBTZu2VuBYH76jT3mfymKx8MMPPyEoKABPnz4BADx79lRpxyAh5AOKwWVT1TH4ayMZO7Zs2Y7OnbuW+1iSdX0ej6dy6ZWKxiFNtfOUleRvauzY7zF37nyNHftToqEy1YxofaIN6N27LwDRAqOzZ09DVlaWRo5vaWmJyZOnARBdqPfu3SV3v2fPRDeLbLY2FixYrHTNn+TkZIXbrKw+TDdPSkpUWb7ERNX7lMX//reBOW/Hjp2ZC96mTeuZKftfOnU+M8nP/lNOhWaxWHB0bIzhw0fC03Mdrly5iT/+2M10CsTEvMH58+c+WXkIIbIo7nwecSc3N5cZlWZoaKgy9SeLxYKR0YdBIcpm+b17lySz1pHs+XOYUXWfOmWGjo4OXFzcMHHiJGzb9geuX7+DJUuWM7Mrrl27goiI8E9aJkII+dxUNF5LziwICnr+3//lr20k5uwsmsHA5/OZdYrEjTt16zrIxIuXL18wo/6HDBmudH3byq6LqXMPUJl1pJs3rzOPFX2+6voa6lQcDge5ublK9xEKhXj3TvS70NPTk1lPUUfnw71haanykfw5OTnlK6gckr+N9+/TKnisqr1PFa3pJZKZWfmp4wn5WlAMLpuqjsFfG8kU32lpny4OScbl8tJUO09ZSaa/rWjsrkrUMVgNaWtrY9WqtejXbwAAIDY2BrNmTZXKA1wRQ4eOQM2aopRmN254Iybmjcw+mZmZAEQ5kJXlzS4qKpKayv6xRo0cmUbRFy9CmEW4FQkIeKay/Oq6fv0qbtzwBiBaGHj9+s2YPXsuAFGu4hUrlqK0VL2c258z8QwTRdLT05k8zjY2NuVKo6ql9aGBWlXjsjIsFgvt2rXH/Pm/Mc+FhgaX+3iEEM2guKMZFYk7kiPacnJyVK5nwOPxkJ2dzfxtYqI41VZ+PhevXilf0F0yliirRCrzoTOz/HECEKUwGzJkOIYPH8k8FxJCsYIQQioSryVnFgQFBUqtbeTiIr9RsmHDRsy6cEFBgVJrG8lLYSaO5YDqdWseP36ksswV8fZtrMo1i549+3APUN7YJ4+v7z1ERUUAEA1+6ddvoMaODahfp/qQVq1icVlTxI2Dirx69ZJZU6hp02YyaeGMjT/cH6anv1d6rLCwUJXlUfe+xczMHA0aNAQAREZGSP3Oy6pZs+bM44CA50r2FO+juftUAFIpbT9VhghCvhYUg9VXlTH4ayTZeVzR764scSg8/FWF1/rTVDsPIN02rSp2Ozo2YQZSBwY+V9ku9LmijsFqis1mY8WK1Rg4cDAAIC7uLWbNmob375XfAKtDR0cH48ZNBCDq5DlwYK/MPuIcwtnZWUovAqdPn0Jubo7Sc33zjWiRXC6XiytXLincNybmjcrKgroSEhKwZcsmAICurh7Wrt0IPT09jB49Fp06dQEgqnjs379HI+erSufPn1E6tdvL6ziTSqV7957lOodkpUETKQfs7Gozj9VdEJkQUrko7lRMReOOoaEhbGxsAYjy2d+9e0vp+W7f9mE6GR0dG6tMAXLy5DGF2wQCAU6dOsH83b37t0qPpYg4VmgqNY10rOAr2ZMQQqqP8sbrRo0cmUEksbExuHPnNrPN1VW2gREQdZw4O7sAEDWqSDbYyHuN5Do0ykahczgcnD59Uml5K0ogEMDL67jC7TxeIc6fP8P8Xd560seio6Owbp0n8/d33w2rtJkQqupU4tSi6qYUr2ynTp1QOshU8l5F3vfRoEED5rGywbHx8XHw91fdcFqW+5b+/UWdu3w+HwcOlL8NwdbWDk2aOKlVTn//R4iLe6v0eGXtpHz40Jd53LBhozK9lhBCMVhdVRWDv1ZOTk2Za7a/vx9CQ0PKfayuXbszj8+fP6t0QLKyNgR1aaqdByhb2zSbzUafPv0AiAZenzql+Pf4OaOOwWpMS0sLy5evwpAhwwAACQnxmDVrCtLSUit87GHDhsPMzAyAeERjpNT2Zs2aARA14O7ZIz/tm4/PDezfv1vlucaNm8CMxtu160+5o/cyMzPh4bFEIw1/JSUlWLlyKTOFfu7c+VI3vStWeDKVs+PHj2h8FN6nlpqaijVrVskd/XD7tg+8vEQBX1dXDyNHji7XOSQrnZGRr5Xuu2HDWkRHRynd5/z5s8xjR8cm5SoTIUTzKO6Uj6biTp8+fZnH27dvU5g+89Wrl/jjj23M3+LGKmXu3r2Nkydlb4YFAgH++GMbwsNFMwobNGiIjh07qzyePHZ2dgBEDV3K1s2JjIzAwYP7lI6uLSwshLf3VeZvihWEEPJBeeK1lpYWXFxcmb+PHz8CAKhTp67Sjivxa6Kjo6QG08hrlGzatBkTfy9fviC3YTI3NxeLF89XOZNAE7y8TuL2bR+Z54uLi7FmzSqkpoo+r/btO6BRI8cKnSs/Px8nTx7HlCmTmNTczZq1wOzZc8p1PE3UqcR1uNzcHKSmppSrHJoUHv4SO3Zslbv+38mTx3H3rqih3NzcAgMGyN7btG3bHmy2aMbbv/+eQWJigsw+79+/x9Klv6k1+FTd+xYAGDFiFGxtRftfvHgef/31h9J0piUlJbh92wfnzp2W2TZ+/ETm8YYNq5GQEC+zT0JCPDZsUL2+8pIlC7B48QI8e/ZU6T1tSUkJdu7cgcBA0YwjNlubSYlICCkbisHq+ZQx+GvHYrEwa5bofkIoFGLx4vlqZY87cGCvzL2Eo2NjtGvXHgCQmpqC9etXy41nZ8544c4d5YOV1aHJdh5TU1NmFmB0dKTKjHY//DCZSUu+b99ueHmdULoGcWFhIS5dugAfnxsqy/KpaKvehXzNWCwWFi8WrbNz7twZJCUlYebMqfjrr33MjWx56OnpY8yY8di7dxeEQiH279+DrVt3MNtHjBiNK1cug88vxblzpxEZGYEePb6FlZU1srKy8ODBfQQEPIOBgQE6d+6Ke/fuKDxX69bOGDlyDM6cOYWCgnzMmDEVffv2g4uLG3R0dBAVFYnLly+Cw8lFt249cP/+3XK/LwDYvXsnIiJEnVfduvXAsGEjpLabmprB03Md5syZCYFAAE9PDxw/fhpmZrKLBgcEPJOZWi1OCwMA9+/flQm2gwcPkepIE8vLy8OJE0elnpOsoKWkvJNZe6tJEyeVMze6d/8Wd+/eRnR0FAYMGAR7+zrgcvPw6JGf1IjA2bN/kVsudbRp0xZnzpwCAKxfvwajR4+FnV1tJr2LlZU1E8gvX76Ay5cvwMGhHtzd26BBg0YwNTVFcXExUlNTcffuLbx5Ew0AMDExwbBhI+WflBBSJSjulJ2m4s6ECZNw585tJCUlgsPJxZQpk9C9+7dwc3OHiYkpOJxcBAQ8x717d5kGr2bNWmD48FFKy9e4cRPk5+fjzz9/x8OHvujR41uYm1sgLS0NN29el0p35uHhKZO6S11t2rTDmzfRKCwsxMKF89C//0CYmZlDnKmrYUNHWFtbg8vl4uDBfTh8+ABatmyFli1bw8HBAYaGRsjLy0N8fBxu3brBVFhbtGgJd/c2Ss5MCCHVT3nitaurO3x97wEQjdwGFKcwExNvFwgEzLpt9es3gKWlpcy+VlZW6NGjJ+7cuQUul4uJE8di8OAhaNSoMdhsNqKiIuHtfRW5uTkYMGAQrl27Ut63r5KrqzvevImGh8cS3Lx5HR07doKRkTGSkhJx7doVplPJxMQUixcvU3m8+Pg45rMDgNLSUnC5XGRkpCM8/BWCg4NQUJDPbHd3b4M1azZKzeAoC03Uqdq0acfUBxcvXoChQ0fA2tqaaTi2t6+DOnXqlqt8ZWVlZYVatWxw+vQpBAcHo0+ffqhVqxays7Nw9+4dBAeLZsKwWCwsXeoBQ0MjmWNYWtZE//4DcOXKJXC5XEyePBFDh45Ao0aOKCkpQXj4K3h7X0VREQ89e/aW2yAtSd37FkB0H7tly3bMnDkFeXl5OH78CG7e9Eb37j3h6OgIQ0Mj8Hg8pKWlIjIyAs+fP0N+PheDBg2ROW/v3n1x69ZNPHzoi4yMDEycOBYDB37HpNJ79SoMV69eBo/HQ9eu3aV+dx8TCATw9b0HX997sLCwhKurG5ycmsLS0hK6uvrgcvMQExON+/fvSXVaTJ48DQ4O9ZR+PoQQxSgGK6fpGEyAjh07Y9q0mdi/fw9ycnLwyy8z4ezsgvbtO8DW1g7a2trIy8tDQkI8wsJC8fJlGIRCodTasmKLFy/HDz+MQ15eHnx8biAyMgIDBgyGnZ0dcnNzce/eHQQEPIOdXW0YGRnJDOouC0228wCAu3vb/9rik7B8+WJ069YDRkbGTOxu1qwFTE1Fs3Otra2xbt1m/PbbPBQXF2PHjm3499+z6Nq1O+rXbwB9fQMUFOQjJeUdXr9+jcDA5ygqKsK0abPK/X41jToGCVgsFhYuXAJtbW14eZ1EcvI7zJo1Bbt27Uft2vblPu7IkaNx4sRR5OXlwc/vAV6/DkfTpqKe/EaNHLFkyTJs2rQBfH4pwsJCZWZcmJqaYc2a9XjxIlTlP9x58xagsLAQV65cBJ9fimvXrsgEodGjx6JLl+4VaqD1938ELy9ROjQbGxssW7ZS7n5ubm0wceKP+OefQ8jIyMDatZ7Ytu0Pmf2Cg4Pwzz+HFJ7Pz+8B/PweSD3Xpk07hR2Dyo6Vmpoqs71//0EqOwY9PFYhL4+DgIDnMh2LgGhk0tSpMzBq1Bilx1GmQ4dOcHFxQ3BwIJKSErFt2/9kyrlypfSIxvj4OMTHxyk8po2NDTZu3Cq1ICwh5PNAcUd9mow7RkbG+OuvvVixYhnCwkJRWlqKW7du4tatm3KP2aFDJ6xatUZlGlFDQyOsWrUWCxfOQ3BwINP4Jn1uI6xbt0lqzYGyGjduAm7evI6srMz/BtZIz4z08PDEwIGDmQZJgUCA0NAQpalQXFxcsWHDlnJ3VhJCyNesrPFa3ppEkuvWyOPo2BhGRkbgcj+kf5J3HLHFi5cjKSkRkZERKCgoYLKXSOrRoycWLVpWqY2SNja2mDJlOpYu/Q0PH/pKDZgUq1mzJrZu/YOZCabM7ds+KjuaAMDBoR7Gjh2P774bJrGGXflVpE41aNB3+PffM4iPj0NkZAQ2bVontX3y5GmYOnVGhcuoDjZbGxs2bMGCBb8gKipCarCtmI6ODhYtWoYuXbopPM7cufMRGxuDV69egsPh4MiRw1LbdXX14OGxCny+QOX3pe59i1ijRo74++/jWLVqOV69eon09HRm8Kw8LBYL1tbyZwKtXbsRy5Ytgr+/H3g8Hs6dOy01u5DNZmPOnF9hamqqtGOwQYNGiIh4DT6fj6ysTJW/U0NDI8ycORsjRpQvkxAh5AOKwYppOgYTkZ9+mgobG1vs2LEVHA4HISHBCAmRXWNYzMDAkJlhJ8nOrjZ27NiFhQvnITs7C/Hxcdi9+0+pfWrVssHmzdvw++9bKlRmTbfzTJ48DU+e+IPH4+Hu3dtMtgGxXbv2S/0badeuPfbtOwxPTw/Ex8chMTGBmbErD5vNltvxXlWoY5Aw5s1bCDZbGydOHEVqaiozGqVu3fKN8jM0NMLIkWNw+PABAMD+/XuwfftOZvugQUPg6NgEp04dR3BwELKyMmFgYAgbGxt06tQFQ4eOgJWVFV68UL2wt2iq/Ur06PEtzp8/h5cvw8Dl5sHc3AJNmzbD0KHD0b59Bya1RXlkZmZg7dqVEAqFYLPZ8PRcr3Rh0ylTpiMwMABhYaF49OghTp8+idGjx5X7/FXF0NAIf/65B9euXcb16954+zYWXG4eM2Jw9OixcHJqVqFzsNls/PHHLpw9exq+vvcQF/cW+fn5clO0XLlyE0+f+iM0NARv3kQjOTkZXC4XbLYWzMzM0aiRI7p06Yq+fQdAT0+vQuUihFQuijvKVUbcsbGxxb59h/DkiT9u3/bBq1cvkZGRAR6vEHp6+rC2tkaLFi3Rt29/qUXsVWnYsBGOHj2Fs2dP4/79u0hJeYfi4hLY2NigY8fOGDfue9SsWbE1kKysrHDkyEmcPHkMz58/RXJyMgoLC2RSfLi6uuHEiTN49uwJXr4MQ2xsDN6/fw8erxA6OjqwsrKGk1Mz9OrVG507d61QmQghpDpQN143bNgIZmZmzKwDQPHaRmJaWlpo3doFjx49VOs1JiYm2L//b5w/fxa3bvkgLu4tSktLYGFhASenZujXb4DU+jaVydXVDceOeeHsWS/4+T1gZkzZ2dVGt249MGbMOBgZGZfr2Hp6ejA0NIKxsTHq12+AJk2c4ObmjpYtW2uk7JqoU+nr6+PQoSM4deo4/P0fITExAQUFBUrTaFUma2trHDx4BBcv/otbt24iMTERhYUFsLKyRtu27TF27Pcq7zGNjIyxZ89BXLhwDj4+NxAXF4fS0hJYWVmhXbtvMGrUWDg41MPVq5dVlkfd+xZJ9vZ1cOjQUTx9+hh37txGWFgoMjLSUVBQAF1dPVhZWaF+/QZwcXFD585dFGbs0dPTw++//wkfnxu4cuUSoqIiUVhYAAsLSzg7u2DEiNFo0aKlyvexfPlK/PzzL3j27ClCQ4Px5k003r1LAoeTh9LSEujr68PCwhINGzZC27bt0KNHL2YmBSFEMygGy1eZMbg6699/ILp27Q5v7yt48uQx3ryJRk5ODvj8UhgZGcHOzh5NmjihTZu26NCho8LMBc2bt8Dp0+dx8uQxPHhwH8nJ78Bma8PW1hbduvXAyJGjYWpqppEya7Kdx9GxMY4ePcUcKy0tFTweT2nsbtq0GU6dOof79+/iwYP7ePXqJbKyssDjFUJfXx+1atmgYcNGcHV1Q+fOXWFpWVMj71sTWEJVCVPLIDo6GsXFJWCxtGBrW0dThyWEEEIIIYQQQgipMsnJyRg2TLQ2nbyMJoQQQgipHBSDSXWXkpIIoVAAHZ0acHTUzLqZlDeJEEIIIYQQQgghhBBCCCGEkGqAOgYJIYQQQgghhBBCCCGEEEIIqQaoY5AQQgghhBBCCCGEEEIIIYSQaoA6BgkhhBBCCCGEEEIIIYQQQgipBqhjkBBCCCGEEEIIIYQQQgghhJBqgCUUCoWaOlh0dDSKi0vAYmnB1raOpg5LCCGEEEIIIYQQQgghhBBCSLWSkpIIoVAAHZ0acHR01MgxacYgIYQQQgghhBBCCCGEEEIIIdUAdQwSQgghhBBCCCGEEEIIIYQQUg1QxyAhhBBCCCGEEEIIIYQQQggh1QB1DBJCCCGEEEIIIYQQQgghhBBSDVDHICGEEEIIIYQQQgghhBBCCCHVgEY7BtlsNgBAIBBAKBRq8tCEEEIIIYQQQgghhBBCCCGEVAtCoRACgQDAh/43TdBox6COjg5YLFFhi4p4mjw0IYQQQgghhBBCCCGEEEIIIdVCUREPQqEQLJao/01TNNoxaGJiAhaLBRYL4HI5NGuQEEIIIYQQQgghhBBCCCGEkDIQCoXgcjlgsQAWiwUTExONHVujHYNGRkZgsVjQ0mKBxytEZuZ78HiF1EFICCGEEEIIIYQQQgghhBBCiBJCoVCqf01LiwUWiwUjIyONnYMl1HCvXV5eHt69eweBQACBQAihEP91Fmq0D5IQQgghhBBCCCGEEEIIIYSQr4ZAIGDSh2ppifrWateuDWNjY42dQ+Mdg8CHzkGhUPjff5o+AyGEEEIIIYQQQgghhBBCCCFfF3H6UBaLpfFOQaCSOgYBUa8ml8sFh8NBcXEx+Hx+ZZyGEEIIIYQQQgghhBBCCCGEkC8em82Gjo4OTExMYGRkVCnZOCutY5AQQgghhBBCCCGEEEIIIYQQ8vmghf8IIYQQQgghhBBCCCGEEEIIqQaoY5AQQgghhBBCCCGEEEIIIYSQaoA6BgkhhBBCCCGEEEIIIYQQQgipBqhjkBBCCCGEEEIIIYQQQgghhJBqgDoGCSGEEEIIIYQQQgghhBBCCKkGqGOQEEIIIYQQQgghhBBCCCGEkGqAOgYJIYQQQgghhBBCCCGEEEIIqQaoY5AQQgghhBBCCCGEEEIIIYSQaoA6BgkhhBBCCCGEEEIIIYQQQgipBqhjkBBCCCGEEEIIIYQQQgghhJBqgDoGCSGEEEIIIYQQQgghhBBCCKkGqGOQEEIIIYQQQgghhBBCCCGEkGqAOgYJIYQQQgghhBBCCCGEEEIIqQaoY5AQQgghhBBCCCGEEEIIIYSQaoA6BgkhhBBCCCGEEEIIIYQQQgipBqhjkBBCCCGEEEIIIYQQQgghhJBqgDoGCSGEEEIIIYQQQgghhBBCCKkG/g/SrNF14XOphQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 1280x960 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "generate_col_plot(df, 256, 32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "8401ef27-8733-4ab3-a561-e915a783a196",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAABwYAAAQVCAYAAAC4+q7tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAB7CAAAewgFu0HU+AAEAAElEQVR4nOzdd3xc1Z3///edpmrLXbbk3otkywU3DBhj00PLkkA2CQlkSSGQZAlkUyGQQALhS035PTYhlCVkCYQlVGODjW1sDLYlW5Z7t1xly+ptyv39Ic1o7hQVW9JVeT0fD9Dcc8+99zPSaKyZ95xzDNM0TQEAAAAAAAAAAADo1hx2FwAAAAAAAAAAAACg/REMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQAxAMAgAAAAAAAAAAAD0AwSAAAAAAAAAAAADQA7jsLgAAAADo6iZMmBC6vXPnThsrAQAAAAAAiI9gEAAAAO2ivLxcs2fPViAQ0M0336z777/fsv/ee+/VG2+8ocTERG3cuFEuF3+adlYnTpxQfn6+8vPztXXrVm3dulUlJSWh/R988IGGDh16Vuc+evSoXnvtNa1YsUJHjx5VVVWVBg0apMmTJ+tzn/ucFi9eLMMwzvk+fPe739WyZctC27Nnz9aLL77Y4uN37typ119/XZs2bdLBgwdVUVEhj8ejvn37asKECVq4cKGuuuoqpaamNnmewsJCXXLJJa2qffjw4ZbaI33lK1/Rp59+2qpzBmVmZurDDz9sUd9AIKDVq1dr6dKl2rJli4qKilRdXa0BAwZo0KBBmjZtmubMmaO5c+cqOTn5rOppLzfccIMKCgo0ZswYvfPOO5Z9r776qn76059Kkv72t79p5syZdpSIdlBaWqq1a9dq/fr12rFjhw4ePKjy8nJ5PB7169dP2dnZuvjii3XFFVfI7Xa36Jxn8/v20ksvadasWa065rPPPtN7772nzz77TEVFRaqoqFC/fv00cOBAZWVlac6cOZo/f77S0tJadV4AAACAd18AAADQLjZv3qxAICBJmj59etT+TZs2SZKys7MJBTuxBQsWqKioqF3O/Y9//EMPPfSQqqqqLO2HDx/W4cOHtXTpUs2fP1+PPPKIBg4ceNbXWbp0aZPBWlNqa2v1wAMP6NVXX43a5/P5VFVVpSNHjujDDz/UU089pQceeKDVwZ+dBgwY0KJ+27dv189//nPl5+dH7Tty5IiOHDmi3NxcPffcc3ryySd1+eWXt3WpZ626ujo0krep5yK3263s7OwOrQ3to7KyUnfffbfWrFkjr9cbtd/r9aqyslKHDx/WO++8oyeeeEK//e1vdd5559lQrdXhw4d1//33a82aNVH7jh8/ruPHjys/P18vv/yy7r33Xt122202VAkAAICujHdgAAAA0C5yc3NDt2fMmGHZV1RUpMOHD0uK/UY9Oo/2CgVfffVV/exnPwtt9+7dW3PnzlVqaqp2794dCqDWrl2r2267TS+//LJSUlJafZ2ysjI9+OCDZ1WjaZq64447tHr16lBbamqqsrKylJGRoYqKCu3bt0979uyRJJ06dUp33nmnnnzySS1ZsqTZ86ekpOi6665rtl+/fv2a3L948WKNGzeu2fNI9d+PN998M7R9zTXXNHvMxx9/rG9/+9uqra2VJDmdTmVlZWnYsGFKSUlRWVmZ9u/fr927d8vv97eojo60ZcsW+Xw+SdHPRVJjMDhlyhR5PJ4OrQ3to6qqSitWrLC0DRgwQFlZWRowYIB8Pp+2b98eCoyPHDmir33ta3rmmWd08cUXt/g6ixcvVnp6erP9WtJHqg/gv/a1r4VGZBuGoYkTJ2rUqFHq1auXKisrdfDgQe3YsSNm4AkAAAC0BMEgAAAA2kVeXp4kaeDAgRo2bJhlX/CNeIlgsCtISEjQpEmTlJ2dHXpj/VxGqezbt88yteznPvc5PfDAA5bpJ9etW6fvfe97Ki0t1c6dO/WrX/1KDz/8cKuv9dvf/lZFRUVyu9265JJL9N5777X42Ndff90SCn7lK1/RnXfeGTV13+rVq/Wzn/1Mx48fl9/v13333acLL7xQCQkJTZ6/T58++sUvftG6OxTDLbfc0uK+L730UigYdLvduuqqq5rsv2XLFksoePPNN+vOO+9U//79o/qWlZXpgw8+UGZmZiuqb3/B5yIp+vmmuLhY+/fvj7kPXV9aWpquvfZaff7zn9fEiROj9m/YsEE/+tGPVFhYKJ/Ppx/+8IdaunRpi0fSfvWrX9WcOXPapNbCwkJLKHjZZZfpnnvuifr3U6oPPlevXt3s1MUAAABALASDAAAAaHOBQECbN2+W1PQIHYk34zu7119/XePHj7dM91pYWHhO53zyySdDo11mzJihRx55RA6Hw9Jn3rx5evTRR3X77bdLkt544w194xvf0JgxY1p8nXXr1oWmAL3ttttaPWXt66+/Hrp9ySWXWEY4hrvgggv09NNP68Ybb5QknT59Wh9//LEWLVrUqut1hPD7tHDhQvXt2zdu37q6Ov3kJz8JhYI///nP9eUvfzlu/969e+v6669vu2LbSHD0ct++fTV69OiY+ySei7oTt9utO+64Q7feemuT4dmsWbP0/PPP69prr1VFRYUqKir0/PPP6+677+7Aauv94he/CIWCt956q370ox/F7ZucnKzLLrusgyoDAABAd+NovgsAAADQOnv27FF5ebmkptf0GjlyZJPBBOw3efLkNl0D8tSpU3r//fdD2/fcc09UKBh00UUXaf78+ZIkv9+vl19+ucXXqampCY3GGzFihL7zne+0utbgNIOSdPXVVzfZd+rUqRo5cmRo+8CBA62+Xnvbu3evZY3A5kK8l19+Wbt375ZUH4w2FQp2ZsERgzk5OVH7+JBC99SnTx/dddddLRpRN3ToUN10002h7ZUrV7ZjZbEtX75cH3/8saT6KW1/+MMfdngNAAAA6DkYMQgAAIA2Fz51X+SIwZqaGm3fvj3mvpaqqKjQ66+/ro8//li7du3SmTNn5PV61adPH40ZM0bnnXeeLrvssmbXXSsuLtarr76qVatW6cCBAyopKVFKSoqGDBmiefPm6fOf/7zGjh17VjXGu15weso9e/aotLRUgUBASUlJSk9P15gxYzR9+nQtXrw45vRx3cGHH36oQCAgqT4Ybu4xcP3112vt2rWSpA8++CDuqL1ITz31lA4dOiRJuv/++5ud1jOWqqqq0O3evXs3279Pnz6h28H72JmEjxbs16+fLrzwwib7//3vfw/d/uY3v9ludbWnAwcO6MyZM5KaHr2cmZmpQYMGteicX/nKV/Tpp59Kkl544QXNmTNHJ0+e1D//+U8tX75cx44dU3FxsVJSUrRhw4ao448cOaJXX31VH3/8sQoLC1VWVqbevXtr6NChWrBggW688UYNGTIk7vVvu+02rVmzRpL03//933F/jk8//bSeeeaZ0Paf/vSnuOvn/eEPf9CTTz4pSfr2t7+t73//+1F9jh07ptdee03r1q3T/v37VVZWJql+rcz09HSNGzdOM2fO1JIlSzRw4MC49XdG4Y+NI0eOdPj1wz/08I1vfENOp7PDawAAAEDPQTAIAACAs7ZmzZrQKIdw69evD91+8803Leu6FRcXh6aRPHz4sH77299ajj3//PO1YMGCuNd8+eWX9fjjj6u0tDRqX1FRkYqKivTJJ5/o6aefbvJN81dffVW/+c1vQiMbg0pKSlRSUqLt27fr+eef15e//GX96Ec/Ouc3apcvX66f/OQnMesuLy9XeXm59uzZo6VLl+qvf/2rVq1adU7X66zCHxstWZsrvM/Ro0d18OBBjRgxosljtm7dqueee06SdM0114RGHbbWkCFDQuHinj17mnxc+ny+0Fp1kmKuZ2anQCAQWltQqh8B6Xa74/bPz8/Xvn37JEmDBw/WtGnT2r3Gc3Ho0KGYI0qDPz+pfr3EyOebrVu3SpIMw4jaN2zYMH3pS19q9tpN/W5H+uMf/6g//vGPoelZg06fPq3Tp09r8+bN+vOf/6zvfve7oWl0I82ZMycUDH766adxn+PCf9eCfeMFg839Xv7v//6vHnroIdXU1ETtCz5n7ty5U2+99ZbefPPNVo3u7QwMwwjd7uhQ//Tp06EPP7hcrk45BTEAAAC6F4JBAAAAnLXc3Fw9++yzTfZ58cUX4+777LPP9Nlnn1nakpOT4wYwv/rVryznczqdys7O1ogRI5SQkKDi4mJt3749NOKjrq4u5nn+8pe/6JFHHgltezwezZ49W0OGDFFZWZnWr1+vkpIS+f1+Pf/88zp27Jieeuopy5vHrZGfn6/vfe978vl8kqTExERNmzZNmZmZ8ng8qqio0OHDh7Vr1y5VV1ef1TW6ir1794ZuT548udn+6enpGjBggE6dOhU6vqlg0Ofz6Wc/+5n8fr/69OmjH//4x2dd66JFi0IB43PPPadrrrlG/fr1i9n3T3/6UygYGjVqlObNm9fs+X0+nz7++GNt3bpVZ86cUUJCgvr27ausrCxNnTpVHo/nrGuPtG7dOh0/fjy0fcMNNzTZP3yKzeDP6cyZM3rllVe0dOlSHT58WF6vV/3791dOTo6uuuoqWwONY8eONftctGzZsrj7CgsLo46fPXt2s8Fgbm6unnnmmdCI5fPOO099+/bV6dOnQyOjgx544AG99NJLoe3k5GTNmTNHAwcOVFFRkdavX6+qqirV1tbqscce06lTp/STn/wk6pqzZ88O3Y4M/4Jqa2tD67w217euri40ytvtdkdNqbp8+fLQtLySlJqaqpycHA0ePFhOp1MVFRU6cOCAdu3aFfrQR1eza9eu0O3Bgwe3+Lh9+/Zpz549On78uHw+n9LS0jRy5EjNmjVLAwYMaNE5cnNzQ2HkmDFjlJiYqOrqav3jH//QO++8owMHDqiyslL9+/dXVlaWlixZoquvvppRhQAAADhrBIMAAADoEl5++WVLKHjFFVfoRz/6Ucwp93bt2qVXXnlFiYmJUfs2bdqkxx57LLR94YUX6uGHH7a8iVtXV6cnnnhCf/nLXyRJ77//vp577jl9/etfP6va//SnP4VCwcsuu0wPPvig0tLSovrV1tbqk08+0QcffBD3XA888MBZ1RBPcC2ujhI+qi4jI6NFxwwZMiQUDO7bt6/JAOrZZ58NBTL33HNP3CCvJb75zW/q7bffVlFRkY4dO6ZrrrlGt99+u84//3xlZGSooqJCe/fu1XPPPacVK1ZIqg8yn3jiiRa9aX/ixAndeuutMfelpaXp5ptv1u23366UlJSzvg9B4dOITpgwQZMmTWqyf/hahBkZGVq/fr3uvvtuFRUVWfoVFhaqsLBQb731lmbPnq0nn3zynL7nXc3TTz8tv9+v733ve/qP//gPyyjM8A8mvPPOO5ZQ8IYbbtBPf/pTyxp4FRUV+uUvf6l//etfkqTnn39es2bN0qWXXmq5ZlZWlpKTk1VVVaWCggJVVFREraWXm5sbun6/fv1CH5oITlsabsuWLaGRgFOnTo163gyfjvTLX/6yfvjDHyopKSnqe1FZWalVq1apoKAg5veqpKRETz31VMx9Z2vEiBG65ZZbzukcgUBAb7zxRmi7NSOM77///pjthmHo4osv1ve+971mRw9H/q7t3LlTd911V9Q6pceOHdOxY8e0bNky/fnPf9bvf/97DR8+vMW1AgAAAEEEgwAAADhrd955p+68805L2z//+c/QKK0///nPuuCCC0L7vF6vzjvvPFVXV+vKK6/U448/3qLrlJaW6tFHHw1t33TTTfrlL38Zt//48ePjrkX3//7f/5Pf75ckTZ8+Xb///e+jRmZ5PB7de++9qqurC4WRzzzzjG688caoN+BbIrjOmMfj0cMPPxw36ElISNBFF12kiy66KO65wsOFtpCZmdlhwWBNTY1lKsKWjqgJ79fUdI0HDhwIhRizZ8/W5z//+bOstF6/fv30yiuv6Fvf+pZ27typoqIi/frXv47ZNykpSZdddpnuvvvuFq9V15TS0lL96U9/0tKlS/XHP/5Ro0aNOutzVVRUaPny5aHt66+/vtljwkcXFhYW6lvf+paqqqrkcDiUk5OjUaNGqba2Vps2bdLRo0cl1U9VefPNN+vVV19Vr169zrreszFnzhzt3LnT0lZYWKhLLrlEknT77bfr7rvvtuz/+te/rrVr12rgwIGhqTlby+fz6fvf/76+/e1vR+0LPq8EAgHLhxEuv/xyPfTQQ1EjkFNTU/XII4+osrIy9OGARx99VIsXL5bD4Qj1c7lcmjlzplavXi2/36+NGzdGPWcE10CU6tese+SRRxQIBPTZZ5+FvidBTU0jWllZGQrahwwZop/97GdxR06npKToiiuu0BVXXBFzf0VFRZs/f82ePfucg8G//e1voWlzHQ6Hbr755nOuyzRNffjhh1qzZo1+/vOf6wtf+ELcvuG/a+Xl5brttttCAfyUKVM0YcIE+f1+y/S+u3bt0he/+EW99tprLf6ABQAAABDkaL4LAAAA0HLBN5ndbrdmzpxp2Zefnx+aKjN8Orzm/O///q8qKysl1QdZP/3pT8+qtr1791qmLv3FL37R5HSN//mf/6m+fftKqn9T+6233jqr6wZrT0pKapPRX11VVVWVZTshIaFFx4WPYIo8R5Bpmvr5z3+u2tpaud1u/fKXvzzrqV/DZWRk6PXXX9djjz0WeizEMnPmTF111VUtCgVTUlJ0ww036PHHH9d7772n3Nxc5efn66OPPtKTTz5pGbG0f/9+feMb31BxcfFZ34elS5eGfu9cLpeuueaaZo8pKysL3V65cqWqqqo0cuRIvf7663r55Zf10EMP6bHHHtMHH3ygn/70p6Hg6sCBA3rwwQfPuta21FTg5fV6lZubK0k677zzzvoagwYN0n/8x3802WfNmjUqLCyUVP+82FS4ZhiG7rvvvtDIw0OHDsVcxzX8+fOTTz6J2h+875mZmbr++utD14s1nWh4W+TzckVFReh2nz592uR3qjPZvXu3JbT9t3/7N40bN67Z42bPnq0f//jHeuWVV/Tpp5+qoKBAn376qV566SV97WtfU3JysqT6UaO/+MUv9O6778Y9V/jv2oYNG1RUVKR+/frphRde0D//+U89/PDDeuSRR/Tuu+/q8ccfDz0fFhcX65577jnbuw4AAIAejBGDAAAAaFPB4C043V2sfVLrgsHVq1eHbt94441nvfZa+BvokyZNanaNu+TkZF199dWhUYPr16/XTTfd1OrrDh48WIcPH1ZpaaneeecdXXnlla0+R1DkqKiupLa21rLd0p9jeL/wEYfhgm/QS/VTgI4ePfosq7SqqqrSU089pb///e+qrq5WWlqaZsyYoUGDBqmyslLbtm3Tvn37tGbNGq1Zs0aLFi3SY489FvXYDxo0aJBWr14dMyAePHiwLr/8cl1++eX63//9X913330yTVOFhYV67LHH4o5WbM7//d//hW5fcMEF6t+/f7PHRK51mZKSomeffVaZmZmWdofDoa9+9auqra3V7373O0nSm2++qTvuuKPJtSA7QvDx4Ha7NWPGDMu+rVu3hu7juQSDl112mVyupl9Whz/vXHTRRRo4cGCT/dPT07VgwYLQ9LTr16+3jLyWrEFnZNhXU1OjLVu2SKp/nu3Xr5/GjRunXbt2RfWtq6sLrUXo8Xii1hfs27evEhISVFtbq927d2vjxo1RH/hoqaFDh3aq56+ysjLdcccdoQ8bjBw5Uv/1X//V7HFPPfVUzA8JpKWladasWZo1a5a++MUv6vbbb9fhw4dlmqbuv/9+LViwIOZI2sjfNafTqf/v//v/NHXq1Ki+wX87fvCDH0iqDxI/+eQTzZ07t/k7DAAAADRgxCAAAADazOHDh3XkyBFJ0SN0pMY36gcMGKAxY8a0+LzBN7njnbelglPiSYp6Azye8EBh27ZtZ3Xd8Kn1/vM//1Pf/e539c477+j06dNndb6uKnKEYPgabE0J7xdr3cgTJ06EppodNWqUvvnNb55DlY2Ki4v1xS9+UX/9619VU1OjO+64Q6tWrdKf/vQnPfDAA3rsscf07rvv6tlnnw2FPR9++KG+//3vxz2nx+Np0ajRL37xi5b78frrr4fWWWyNwsJCSyB/3XXXtei4yJ/Vl7/85ahQMNzXv/710PcgEAho6dKlra61rQXvd3Z2dpMfUjiX55SsrKxm+7TH886UKVNCj6Pt27ervLw8tC98fcHgfQt+3blzp0pKSkJ9N2/e3OT6gh6PR4sXL5ZUP23qLbfconvvvVcffPCBZaRbV1NbW6vvfOc7OnjwoKT6aVyffPLJFv1uNjVyOGj06NH605/+FAqNS0pK9I9//CNm38jftSuuuCJmKBh05ZVXWh5377zzTrP1AAAAAOEIBgEAANBmwte1ihzB4Pf7Q1P3zZo1q8XnrKiosIwSGzZs2FnXFz4dY0vXZQoPQ86cOXNW1/32t7+tnJwcSfVTXi5btkw/+MEPNH/+fF122WX6yU9+ojfffNMybV93FBnORI4gjCf85x9rJN4DDzwQCkZ++ctfnvWI0kj33HOPdu3aJUm64447dNddd8UMJs8//3w9++yzoet+9NFHbfJm/e233x66nt/vjzmlZHPeeOMNmaYpqX4qyEWLFrXouMjvczAcisflcuniiy8ObW/atKmVlbat5j6kEAwG+/fv36oPKURqSUh0Ns87Q4cODd2O9bzjcrlC4WFw7cCg8Ofh4Mjs4PfANE1L36amEQ368Y9/rJEjR0qqn4L1jTfe0He+8x3NmTNHn/vc5/TLX/5Sy5cvb3HQb7fgupDB70NCQoL++Mc/auLEiW16nbFjx1pGh69atSpmv9b+rknSkiVLQreD/64CAAAALcVUogAAAGi1zZs364033ohq37hxY+j2m2++qWXLloW2KyoqQmvtHTt2TA888IDl2GnTpunaa6+NOmfwmKB4UzS2RPj6dC09T1JSUtxaWio5OVkvvviiXnzxRb300kuhwEKqX5PtwIEDeu2115SUlKQvfelLcQOori4xMVGJiYmhoK+lI+DC+6WlpVn2LV++XMuXL5ck3XDDDec0+ivc5s2btWbNGklSr169ml1Hbvz48br22mtDo4L++c9/ntOUsVL99J1Tp04NBT379u1r9TnCf0+vvPLKFoemffr0sWyPHTu22WPCA7aTJ0+2rMBzVFJSoqeeeiqqPbimn1S/tmnk882GDRsk1U8zGrmvT58+uuuuu1p0/Zb8nrbX886cOXNC0yyvX78+FPoGHy9Dhw4NfbDhvPPOk2EYMk1T69evDwVL4SFivN+dgQMH6rXXXtOf//xn/eMf/wj9PgYCAe3atUu7du3S3/72N6Wlpekb3/iGbrvtNjmdzhbdz44WCAT0X//1X/rwww8l1QesTz75ZKumtm6N+fPn61//+pek+jVuY+kqv2sAAADoPggGAQAA0Gp79+7VSy+91GSf1157Le6+zZs3h9a1CqqqqooZDEZO7VZVVdWi6d5iCX9TPvzN+qaEr/90tteV6qfku+2223Trrbdq586d+uyzz5Sbm6sNGzboxIkToWv95S9/0YYNG/TCCy/EDB0iQ4xz1ZoQpC2MGjUqNLXi0aNHW3TMsWPHQrcj1w4Mn6Zxy5Yt+sIXvhD3PMePHw/dLigosPS97777NGXKlNB2+LqWOTk5LQqA5s6dGwoGt27d2mz/lhg0aFDodmtHrG7cuDE0VaIkXX/99S0+dvTo0froo48kSYZhtCjQCv/9ONsQvbUqKiqafS4Krv8Yy/Hjx6OOz8zMbNPfifZ63om1zmBNTU3ouTV8f58+fTRhwgTt2LEj1Leurk55eXmSYq8vGC41NVXf//73deedd2rr1q3asGGDNm3apI0bN4Yel6WlpXrssceUl5en3//+9zIMw3KOeCHuuRgxYoRuueWWFve/77779Oabb0qqXx/zt7/9rWWka1sLX08y3u9v5HNaZ/1dAwAAQPdBMAgAAIBOLTU11TLKrLCw0PJma2v069cvdDs8bGpK+Oi+lkwb2BzDMDRx4kRNnDhRX/nKVyTVryH24osv6p///Kek+uD0pZde0m233RZ1fHMhSGu1dQjSnDFjxoTCvJas2XjixAnLiMGmpn3cs2dPi+uorKy0hNOR07gGw1opekRPPOGPj7aaFjY8SAofRdYS//d//xe6PWbMmCbXLYs0bty40G3TNFVVVdVsYBEeUKSmpra80G7ubJ53wkc8xnveCa4zWFlZqR07dqikpETbt2+X1+uVFD0CcPbs2dqxY4d2796t4uJi7dmzJzSd77Rp06LWuovF6XRq2rRpmjZtmm677TYFAgFt2rRJf/nLX0Kj8D744AMtXbpUl19+ueXYloS4rTV79uwWB4MPPfSQXnnlldD2Aw88oKuvvrpN64kUHvDG+/0J/12TWhYe87sGAACAc8EagwAAAGi1G264QTt37rT89/jjj4f2P/vss5Z927dvV69evSRJl156adSxO3fu1G9+85u41wsPND755JOzrnvSpEmh2y1dlyl8rbTJkyef9bWbMnnyZD388MO68cYbQ23BN9m7m/CwInwaw3jC10PLyMjQiBEj2qWuSOEhSWlpaYuOKSkpCd0OPt7PVfiIyPDRg82pra3Ve++9F9q+7rrrWnXdyDVCWxK6hk+VOGTIkFZd72wNHTo06rkkfC3Gb33rW1H7gyPE+vbtqx07dkTtb+vfvbN53gnvF+95x+l0aubMmZLqw9sNGzZY1gyMDAbD1xn89NNPY65F2FoOh0OzZs3SH/7wB51//vmh9s72/PX444/r+eefD23/+Mc/tjzftpfwDz/E+/3NycmxhIad9XcNAAAA3QcjBgEAANAmgm8yu1yuqCnpdu7cqfLycknSrFmzWn3uCy+8MHT+f/zjH7rttttavFZauPCwY9u2bdqxY4cmTpwYt391dbXeeeedmMe3h0WLFoWmooy3/t7OnTvbtYb2tmjRIt13330KBALav3+/8vLylJOTE7f/66+/Hrp9ySWXRO2/8847deedd7bo2k8//bSeeeYZSfVByIsvvhi3b0ZGRuh2Xl6eamtrmx1RFR5at0WAuXbtWssIs9aENx988IHKysok1Yc3sabpbUpmZqamTJmigoKC0PmaGnHo8/m0cuXK0PZ5553Xquu1pfDAK/L5xjTNUNg/c+bMqOku28PcuXP1l7/8RZL00Ucf6fTp0+rfv3/c/idOnLBMZdvU887s2bO1atUqSfWPv2AQNWLECA0ePNjS97zzzpPD4VAgEND69estAdS5rrFnGIYuvvjiUCh7+vTpqD7BELej/fGPf9Sf/vSn0PZdd92lr33ta+1+3bq6utD6glL873FiYqIuuOACLV26VFL9uqlXXHFFk+cOrqsq2fu7BgAAgK6JEYMAAABoExs2bJBUPzomcsq04D7p7N7E/MIXvhA655EjR/TrX//6rGocM2aM5foPPvhgaNq9WJ544onQG9ypqalnNe1cXV1di9eACg+BmgoOurIBAwZoyZIloe1HH31UpmnG7Bu+NpzT6dRNN93UITVK0vz580O3y8rK9Oc//7nJ/nv27NEbb7wR2l6wYEFUn7q6OtXV1bXo+sXFxbrvvvtC22PGjLGsgdic8EB1/vz5Sk9Pb/GxQV/96ldDt//nf/6nyWkwX3jhBZ08eVJS/WjLK6+8stXXayvB5xun0xn1IYVdu3aFRoB2VKCyYMECDR06VFL9Y+Chhx6K29c0Tf3qV78KPS8NHz7c8liMFD4qcPXq1dqyZUtUe1BaWlrogxBr1qwJTaXb1PqCFRUVLX7Mhj8+wqdPtdPzzz+vJ554IrT9jW98Q3fcccdZn6816/k98sgjlilhr7nmmrh9w3/X3nvvvSbXKF26dKny8/ND261ZOxQAAACQCAYBAADQBs6cORMafRJrRGDwjfpevXo1OUIvnrS0NP3whz8Mbf/973/X97//fR0/fjxm/927d+tXv/pVKFQK95//+Z9yOp2huu68886o0S11dXV67LHH9Nxzz4Xavvvd7yolJaXVtZ88eVILFy7Ub3/7W8ubuZE+/vhjPf3006HtCy+8sNXX6iq+973vye12S6r/GfzoRz+KWlfrk08+0d133x3avvbaazV27NgOq3HixImWkVrPPPOMfv/734fWZAu3bt063XrrraF9ycnJ+tKXvhTV7+TJk1qyZIn++7//27J2ZTjTNLVy5Ur927/9mw4dOiSpfjTWj370IzkcLXv5VlRUZJlO82yDg2uuuSY0jWVFRYW+9rWvRY34Mk1TL730kn73u9+F2r761a/GXQe0sLBQEyZMCP0XXFezLQWnn504cWLU+mvhH1I4m9HLZ8PhcFgey2+99ZZ+9rOfRYVMFRUV+vGPf6z3338/1HbPPfc0+XOfMmVK6D4eOHAgFCjGG50WbD906FDo8ZqTkxN3NGxBQYEWLVqkp59+Ou4Ul36/X++8847+53/+J9TWGZ6/Xn31VT388MOh7X//93/XPffcc07nvPPOO/WTn/xEn332mQKBQMw+hw8f1l133WUZkXzllVc2OTJ61qxZoRHRPp9P3/zmNy3TKAe99957+tGPfmQ5b/hUtQAAAEBLMJUoAAAAztmGDRtCo75ivdm+ceNGSdL06dNbHG5E+vd//3ft3r1bL7/8siTp3Xff1fvvv6/s7GyNHDlSCQkJKi4u1rZt20KhS6wp+GbMmKG7775bjzzyiCRpxYoVWrhwoebMmaMhQ4aotLRU69evt6wXt2TJknOaeq6srEzPPvusnn32WfXp00eTJk1Senq6EhISdPr0ae3cuVOHDx8O9R85cqRlBImdXn75Zf3973+3tEWOsrz99ttDQV/QTTfdpJtvvjnmOceMGaP77rtPP/vZzyRJb7zxhlauXKm5c+cqJSVFe/fuDY1mkqQJEybopz/9aVvcnVZ58MEHddNNN+n06dMKBAJ66qmn9MILL2jGjBkaOHCgqqurVVBQYFnvyzAM/frXv447Yur48eP63e9+p9/97nfKzMzU+PHj1bdvX7ndbhUXF2vz5s2hkXdB99xzjy666KIW1/3mm2/K7/dLqg/jFy9efBb3vj7Qeuqpp3TTTTfp1KlTOnDggK677jpNnz5do0aNUm1trTZt2mQJOWfPnq3vf//7Z3W9tlBSUqLdu3dLUmj9vXDB56KUlJQODVSuvPJKbdiwQS+99JKk+imR33nnHc2ZM0cDBgzQ6dOntW7dOktAfsstt+jSSy9t8rzBdQY/+ugjS3usEYPB9vAPPEjNTyNaVFSkZ555Rs8884wGDhyoiRMnauDAgXI6nTp16pQKCgosj9lZs2bpqquuavKc7W3nzp36+c9/Hvp3KTk5WaZp6oEHHmjR8V/96lc1cuTIqHav16vXXntNr732WuiDLoMHD1ZKSoqqqqq0d+9ebd++3RIaTp06tUWj3B966CHdfPPN2rdvn06dOqUvf/nLmjJliiZMmKBAIKD8/HzLc83YsWP14IMPtuj+AAAAAOEIBgEAAHDOwkc2zJgxw7Lv4MGDKioqknTuU/fdf//9GjVqlJ566ilVVFTI7/crLy9PeXl5UX0Nw1BiYmLM89x2223q3bu3fvOb34Smygtf0yvI6XTq3//93/Vf//VfZ70WmdvtlsfjCU3HV1JSonXr1sXtP3v2bP2///f/oqZjtcupU6e0Y8eOJvuEv1kdflxTbrzxRkn1b4ZXVVWptLQ0tMZWuHnz5umRRx6JGvnVEYYPH66XXnpJ9957b2iKxpKSEn344Ycx+/fr108PPvhgi4O4I0eOxB05KEnp6em67777Yq6t2JTwaUSvuOKKuL8HLTFs2DC98MILuueee1RQUKBAIKCNGzeGArZwN9xwg375y1/K5Yr/MjNy2tiz/aBAPM19SCE4YnD69OmhkcMd5Re/+IUGDBigP/7xj6EphmM9lhISEnTHHXfom9/8ZovOO3v2bEswOGrUKA0aNChm3/B1BsOPjycxMVEul0s+n09SfUgYfD6P5bLLLtNDDz3U5j/X1iopKbHcx6qqKv3tb39r8fGXXXZZzGAwXHl5ecxRfUFut1s333yzfvjDHza7Pqkk9enTR88995x+/OMfh0b8FhQUhNb5DHfRRRfp0UcfteV5EQAAAF0fwSAAAADOWfDN9jFjxkSNlGrrqftuueUWXXPNNXr99de1Zs0a7dmzR2fOnJEk9e3bN7SO4JVXXtnkG7s33nijLrnkEv3jH//QqlWrdODAAZWWliolJUWDBw/W/Pnz9fnPf/6cp69MT0/X+vXr9cknn2jDhg0qKCjQoUOHVFxcLK/Xq5SUFGVkZCg7O1tXXnllk+uJdTc33nijzj//fL366qtasWKFjh49qqqqKg0cOFBTpkzRNddco8WLF591KNsWRo0apVdeeUVr167Vu+++qy1btuj48eOqrKyUx+NR3759NXnyZF144YX63Oc+p6SkpLjnyszM1Jtvvqm8vDzl5uZq9+7dOnPmjEpKSlRTU6PU1FQNHDhQ2dnZuuCCC7RkyZKokZjN2bZtm3bt2hXavu666872roeMGTNGr7zyit577z29/fbb2rlzp06dOiW326309HTNmTNHn//855WVldXsucKnIu3Vq5cuvvjic64vXPjzTeSIwcOHD+vEiROSOm59wUjf+c53dO211+of//iH1qxZo8LCQpWXl6tXr14aNmyYFixYoBtvvFEZGRktPmfk6MCmgr5evXpp0qRJobDJ4/E0OcXltGnTtHbtWq1du1YbN27U9u3bdejQoVDwlpqaqmHDhiknJ0fXXHONpk6d2uK6u6InnnhCubm5ys3NVX5+voqKilRSUqKysjJ5PB716dNH48eP18yZM3XdddfFDWjjSU9P17PPPqsVK1bozTffDF3DMAwNHDhQM2fO1DXXXKN58+a10z0EAABAT2CYkR/ZBAAAAACgHfzqV78Krb32ve99T9/5zndsrggAAAAAehZ75/cAAAAAAPQY69evlyT1799ft9xyi83VAAAAAEDPQzAIAAAAAGh3p0+f1u7duyVJt99+u1JSUmyuCAAAAAB6HoJBAAAAAEC7W79+vUzT1JAhQ/SlL33J7nIAAAAAoEdijUEAAAAAAAAAAACgB2DEIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADEAwCAAAAAAAAAAAAPQDBIAAAAAAAAAAAANADuOwuAADQfZSV1cjvD9hdBtpQ796Jcjod8vsDKiursbscdCM8ttAeeFyhPfC4QnvhsYX2wOOq+3I6HerdO9HuMgAA3QDBIACgzfj9Afl8frvLQDvhZ4v2wmML7YHHFdoDjyu0Fx5baA88rgAAQCxMJQoAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AASDAAAAAAAAAAAAQA9AMAgAAAAAAAAAAAD0AC67CwAAAB3H9PkUqKtVoLZOZm2tAnW1MmvrGtpqw9rqtyscAZk+vwy3S7UBhxwejwyPRw5PgoyEhq8eT317QkLD/oRQP8PBZ5AAAAAAAACAzoJgEACATsQ0TZl1dWHhXF19YFcXFuJFhHfBMK/+dsOxdQ39gsc23Jbf36H3x3C74weJHo8cCQmN+xu2CR8BAACA1vMWF2v/ay/Le7pYRkKCfIZTRkKiHAkeORIS6/+WDv1n3bb0czrtvisAAKAdEQwCANBKpt8fFbg1BnLBtrqIwM4a5gXq6hr61oW11Qd6Mk2772KbMb1emV6vApWV7XaNJsPHyKCR8FFSfQCtQEAyTZmBgBQI1H9taDfD95kBKdB4O26/OOeL2tdwjkCyRw6nQ6YMVVTV1X+PDYfkMEK3678akqPhtsMhI2K7vp/R2L/hmFB/R/05G/cbMoywfoZh948DAACgxcxAoP51RE1Nw3+1CtRUK1Bbq6NPP9Em1zBcrobQMLEhNAwPFIMhYkLYvhjhY2Ji/d/TiQlyeBq2XbwNCQBAZ8C/yACAHsdbfFrVO3Y0vICuiwjswkbi1YWPzmscuWf6fHbfBYTpVOGj21NfkxkjKAuGcU0Gamb0sWZAZiAY5DXeblFAFyfI607h8zkLDxrDAsWoEDJOUGkNIY1mA82oa8UNNI2GoLThtoIBpmn5EvxZhn6ilp9t7L7BBuvDoOm+ERdp6GLGbG/yPKEv4X3jnCfmeSNrNy3tpzwuGYYh0zRVV+er/zlICn7/Gjet7bLkwxFtDX2NqL6GZX/sfcEu8a4XfmzsfUarrmdENDd/PcPhkJzOqN8Bw+FsfPyFtYXCdkufsFA/1MdRH8I7HdZth0OG0/rYj9y2/O6E/w6h0/NXVKh09Sr5K8rlSEqSIzFRjsSGr+HbSY3t3e3DN0Ak0+erD/BqrSFeoKZGZqg97GtNY+hnhvY1HmfW1nZIzabP1/Z/Xzud1kDREytAjBc8xg8iDbebfycAAGgFgkEAQI9SsSVPR59+kmAkyDAsL8xDn/BtCMASe6XI6fHIX1ermvKqhmlOg6Mh6xqnPa2rk+n12n1v2k1HhI+wiWlKfr/Mhml2eWYAOqnIsD3WV6MhZIwRQjZ7bBNfS5I8crhcktOlWp9Z/wa0y1X/X8Nth8vduO12yQhuu4LbYW3h+7tRKGYGAjr86G9Ud6SwVccZCQkxwsPg7fptZ5xQMbyPkZBAMIBzFprWPzyMq6mp/9vXEt41BnZmTUSwFx7i1dTwocJwfr8CVVUKVFW17XlDr2kaA8T6wNFT/1zhSZARPnIxrN0zZIgSRozk+QMA0KMQDAIAepSSDz/scqGg4XaHRqWFh3eGJ6Hhk7Jhn7gN3W6YCjPyE7kJHksIaLhcTb4I7tcvRU6nQ35/QMXFTYdi9dMa1VmnSa0LCxCDozDD10CMCBcb+8Q+pjuHjwCAJtgY4pe158mdTkvI6IgVHjYXLgZvu91x+zjCw8zI87nDQs1zCCrrjhS2OhSUJLO2Vv7aWvlLz/rS9Qyj2fAwfru1j+Fi9FFXYQYCYQFddVgwFx3imbH2xQj/utprhab0WXRJw5rlNY0zpNTWNmzXt9uxDnmbM836n11Njc7mnvSef74G3/ofbV4WAACdFcEgAKBH8QwerKqtW9r2pJGj7oLr2FlG4nnCRuJZR+VFt4UFfp6ELjOawHA4ZDR8yr+9tDp8jBNIEj62gVjTaQbXCAwfnRM+PWfYbZfLKal+ylOf12ed7jR8StVAxJSqZth0rIGA3d8FADg3DWFncGpA29+adziaCCOjw0VHWBjpKy2xt3bTVKC6WoHqaunMOZ7L6YwODBOT5ExKlJGYKGdiUsTUqJEjGsOmSu1Ea6qZwYA9EJACfpn+gMyAX/IHLG0K1Pcx/f76f3PDvjauJRw8rvFr+LH11zGt14l1rjjnjD5XoGEazbDpNmtr6tfn7umCj9eE+seeJyVZycOHKfOLN6rSkdSiU5g+X1hgGD9ADI2crKtVoKa2ca3DhlGWZkN7oK5rjZYsW/uxBt54k5y9etldCgAAHaLz/IUKAEAHGHD95yXDUM3ePTKczpiBnGWkXXAUXnjYZxmJ5+FT5R2oU4SPwTUnY4SPprdOkhF3fbmotefiBmoNa3oZjbcNI3xdrxjHxlsHz7JmWPQ+6xp68dfIs5ynDR7vrRmN2uTPyxIoRq7rGKPNEi6aYf0aA8hY6zZGr9UYoy1qLUnr+cOPsZy3BXVbRaw1F2ddu/ouzaxt18R6dVFr2zV5TSOyZ7N9FaNv1GMr6hzxz2MYhpKSPDIchsyAqaqqiDWY4qx3GDkypHHtxIgRIy1YvzF6rceWrvPY8vUZm13jsdnjw9Z4DH9c+hsfe5GPz3hfo9rM4Jv+Eeuc+v2NvxvdaCROt9Twb2BbhC3OXr3lycysDw2qG0Zy1VR3yPpo58zvV6Cysk2mEDfc7ojwsHHK1JK0XjIMQwGfT7XVtY2/T5GhWUSwFt4vbqAWHqwF+/L71ymEprFMSAw9JoyExPo19iLaHQ1T7RoNU2NGtscKn8P/xqps4d9Yhsslp8slZ0pKm95X0+8PrdMeDAwtAWIw7I07qrFx22xoC/7d3Zacqb1kJCa06TkBAOjMCAYBAD2KIyFBg754s91loBPriPARbccwjPqp+JxOu0tBJ9BWgTPajzUADw/J/dZAvWE7VltoZHH4dnjA2dR2U8FmnK+JHqcU8Mvv9aqmolqmz1e/9qzP1/CfV6Y37LbP17Dd2KcnSp0+Xelf/XpUe2jqx5pqBaprQtM4hoeH1tvx+3SF763p9crv9cpfHj0pbbkN9aCVwqeoTUgI/Y3YGM7FCvjC9sVo7yqzgbQFw+mUMzlZSk5u0/OGPsjXECqaDYFhY4hYEyNQjA4fzdoaufr2U78rr5bD7WnTGgEA6MwIBgEAAAAAHSI08tjuQlrhXANn0zRjh4heb1iQGCtojB0yml6vApYQMsY5mjqfz9chI8dSZ8yM2W44HHImJ9eHBefI9PmiAkN/dbXMmhr5LQFj88Ej01N3D4bLVT/CLjHBOvIuLKxrDOnC9yfIkZAkR2JD+Newz3AzM0hnxAf5AAA4NwSDAAAAAAC0E8MwZLjdktstqWXrfbWn0DpzMYLDQMxwMUZAaQkarSMmZTiUMi1HKVlT2/2+GC6XnKmpcqamntN5TNOsD1ybDA/jtNdGjGqsqek+U3Y6nQ1hvlOG0yHD4ZQivzoMGQ37Ff7V4agfzR/+tZlzGQ5H2DXDj6u/jsOT0BjiRU6vmdAwGq8TrekIAADQWfEXEwAAAAAAPYRhGJLL1S4BSm1trcrKSpTUt3+bn7s9GYZRv+6bxyOlpZ3TueqnOKwNjVL0V9fIrK2Rv7ohVKxtGL1YXS2P4ZfD4ZBpOFTr9TcGa0Z9KGYJyhq+RgdqTQRxkeeIOD48xAudPzzEAwAAQLdEMAgAAAAAAM5aeXmZCgq2aM+enQoE/EpKStZVV12v5OQUu0vrcPVTHCbJkZgk9enbZF/WRQUAAIAdCAYBAAAAAECrlZQUa+vWPO3fv7d+itIG1dVV2rt3l7Kzp9tYHQAAAIBYCAYBAAAAAECLnTp1Uvn5eTp8+EDcPqmpvTquIAAAAAAtRjAIAAAAAACaZJqmTpw4pvz8XB07diRuP4fDoSlTpmnkyDEdWB0AAACAliIYBAAAAAAAMZmmqcLCQ8rPz9WpUyfj9nO5XBo/fpImT57aI9cWBAAAALoKgkEAAAAAAGARCAR08OA+5efnqaSkOG4/j8ejiROzNHFilhITEzuwQgAAAABng2AQAAAAAABIkvx+v/bu3aWCgs0qLy+L2y8xMUmTJ0/V+PGT5PF4OrBCAAAAAOeCYBAAAAAAgB7O6/Vq9+7tKijYourqqrj9UlN7acqUaRo7drycTt5SAAAAALoa/ooHAAAAAKCHqq2t0Y4dBdq+favq6mrj9ktL66OsrByNGjVWDoejAysEAAAA0JYIBgEAAAAA6GGqqqq0ffsW7dy5XT6fN26//v0HKjs7R8OGjZRhGB1YIQAAAID2QDAIAAAAAEAPUV5epoKCLdqzZ6cCAX/cfoMHZygrK0dDhmQSCAIAAADdCMEgAAAAAADdXEnJGW3dmqf9+/fINM24/YYOHa7s7OkaODC9A6sDAAAA0FEIBgEAAAAA6KZOnTqprVvzdOjQgbh9DMPQiBGjlZ2do759+3dccQAAAAA6HMEgAAAAAADdiGmaOnHimPLzc3Xs2JG4/RwOh8aMGa8pU6apd++0DqwQAAAAgF0IBgEAAAAA6AZM01Rh4SHl5+fq1KmTcfu5XC6NGzdJkydnKyUltQMrBAAAAGA3gkEAAAAAALqwQCCggwf3KT8/TyUlxXH7eTweTZyYpYkTs5SYmNiBFQIAAADoLAgGAQAAAADogvx+v/bu3aWCgs0qLy+L2y8xMUmTJ2dr/PjJ8ng8HVghAAAAgM6GYBAAAAAAgC7E6/Vq9+7tKijYourqqrj9UlJSlZWVozFjxsvl4uU/AAAAAIJBAAAAAAC6hNraWu3cWaDt2/NVW1sbt19aWh9lZeVo1KixcjgcHVghAAAAgM6OYBAAAAAAgE6surpK27bla+fObfL5vHH79e8/QNnZ0zVs2EgZhtGBFQIAAADoKggGAQAAAADohMrLy1RQsEV79uxUIOCP2y89fYiys6dryJBMAkEAAAAATSIYBAAAAACgEykpOaOtW/O0f/8emaYZt9/QocOVlZWjQYMGd2B1AAAAALoygkEAAAAAADqBU6dOauvWPB06dCBuH8MwNGLEaGVl5ahfv/4dVxwAAACAboFgEAAAAAAAm5imqRMnjik/P1fHjh2J28/hcGjMmPGaMmWaevdO68AKAQAAAHQnBIMAAAAAAHQw0zR15Mgh5efnqajoRNx+LpdL48ZN0uTJ2UpJSe3ACgEAAAB0RwSDAAAAAAB0kEAgoIMH92nr1jydOVMct5/H49HEiVmaODFLiYmJHVghAAAAgO6MYBAAAAAAgHbm9/u1d+8uFRRsVnl5Wdx+iYlJmjw5W+PHT5bH4+nACgEAAAD0BASDAAAAAAC0E6/Xq927t6ugYIuqq6vi9ktJSdWUKdM0duwEuVy8VAcAAADQPni1AQAAAABAG6utrdXOnQXavj1ftbW1cfulpfVRVlaORo0aK4fD0YEVAgAAAOiJCAYBAAAAAGgj1dVV2rYtXzt3bpPP543br3//AcrOnq5hw0bKMIwOrBAAAABAT0YwCAAAAADAOaqoKFdBwWbt3r1TgYA/br/09CHKzp6uIUMyCQQBAAAAdDiCQQAAAAAAzlJJyRlt3Zqn/fv3yDTNuP2GDh2urKwcDRo0uAOrAwAAAAArgkEAAAAAAFrp1Kkibd2aq0OHDsTtYxiGRowYraysHPXr17/jigMAAACAOAgGAQAAAABoAdM0deLEMeXn5+nYscK4/RwOh0aPHq+srGnq3TutAysEAAAAgKYRDAIAAAAA0ATTNLVv3z6tW/eJiopOxO3ncrk0btxETZ48VSkpqR1YIQAAAAC0DMEgAAAAAABx7NmzW+vXr1dRUVHcPh6PRxMnZmnixCwlJiZ2YHUAAAAA0DoEgwAAAAAARDBNU5s2faqCgs1x+yQmJmny5GyNHz9ZHo+nA6sDAAAAgLNDMAgAAAAAQBjTNJWXtyFuKJiSkqopU6Zp7NgJcrl4WQ0AAACg6+AVDAAAAAAAYbZs2aT8/Nyo9rS0PsrKytGoUWPlcDhsqAwAAAAAzg3BIAAAAAAADfLzc7V588ao9osvXqShQ8fIMAwbqgIAAACAtsFHHAEAAAAAkFRQsFm5uZ9FtV966aWaNm0aoSAAAACALo9gEAAAAADQ423blq+NG9dHtS9atEjZ2dk2VAQAAAAAbY9gEAAAAADQo+3YUaANG9ZFtZ933nxNnTrNhooAAAAAoH0QDAIAAAAAeqxdu7br008/jmqfNWuuJk3KsqEiAAAAAGg/BIMAAAAAgB5p9+4d+uST1VHtM2bM1uTJU22oCAAAAADaF8EgAAAAAKDH2bt3l9atWxXVnpMzS1lZOR1fEAAAAAB0AIJBAAAAAECPsn//Hq1d+1FU+9SpMzR16gwbKgIAAACAjkEwCAAAAADoMQ4c2Kc1a1bINE1Le1ZWjqZNm2lTVQAAAADQMQgGAQAAAAA9wqFDB7R69QdRoeDkyVM1ffp5MgzDpsoAAAAAoGMQDAIAAAAAur3CwoNatWp5VCg4aVKWZs6cQygIAAAAoEcgGAQAAAAAdGtHjhzWypXLFAgELO0TJkzWrFnzCAUBAAAA9BgEgwAAAACAbuvo0UKtWPF+VCg4btxEzZ59PqEgAAAAgB6FYBAAAAAA0C0dP35UK1YsVSDgt7SPGTNec+deQCgIAAAAoMchGAQAAAAAdDsnThzThx++J7/fGgqOHj1W8+ZdSCgIAAAAoEciGAQAAAAAdCsnTx7XBx+8J5/PZ2kfOXKM5s9fKIeDl8IAAAAAeiZeDQEAAAAAuo1Tp07qgw/elc/ntbQPHz5KCxZcTCgIAAAAoEfjFREAAAAAoFs4fbpIy5a9I6/XGgoOGzZCF154CaEgAAAAgB6PV0UAAAAAgC6vuPh0QyhYZ2nPzByuCy9cTCgIAAAAACIYBAAAAAB0cWfOFGvZsrdVV1drac/IGKqFCxfL6XTaVBkAAAAAdC4EgwAAAACALquk5IyWLXtbtbU1lvbBgzO1cOGlcjpdNlUGAAAAAJ0PwSAAAAAAoEsqLS3RsmVvqaam2tKenj5EixZdJpeLUBAAAAAAwhEMAgAAAAC6nLKyUr3//luqrraGgoMGDdaiRZcTCgIAAABADASDAAAAAIAupby8rCEUrLK0DxgwSJdccrncbrdNlQEAAABA50YwCAAAAADoMioqKvT++2+pqqrS0t6//0AtXnyl3G6PTZUBAAAAQOdHMAgAAAAA6BIqKyv0/vtvqrKywtLer98ALV58pTweQkEAAAAAaArBIAAAAACg06uqqtT777+tiopyS3vfvv20ePGVSkhIsKkyAAAAAOg6CAYBAAAAAJ1adXWVli17W+XlpZb2Pn36asmSq5SYmGhTZQAAAADQtRAMAgAAAAA6rZqaar3//tsqLS2xtKel9WkIBZPsKQwAAAAAuiCCQQAAAABAp1RTU6Nly95WaekZS3uvXmlasuRqJSUl21QZAAAAAHRNBIMAAAAAgE6ntrZWy5e/rTNnii3tqam9dOmlVyk5mVAQAAAAAFqLYBAAAAAA0KnU1dVp+fJ3VFx82tJeHwp+TikpqTZVBgAAAABdG8EgAAAAAKDTCIaCp08XWdqTk1O0ZMlVSk0lFAQAAACAs0UwCAAAAADoFLxerz744F2dOnXS0p6UlKxLL71avXr1tqkyAAAAAOgeCAYBAAAAALbzer368MP3VFR0wtKelJSkyy67Wr17p9lUGQAAAAB0HwSDAAAAAABb+Xw+rVixVCdOHLO0JyYmacmSq9W7dx97CgMAAACAboZgEAAAAABgG7/fpxUr3tfx40ct7QkJiVqy5Cr16dPXpsoAAAAAoPshGAQAAAAA2MLv92vlyuU6dqzQ0u7xJGjJkqvUt28/myoDAAAAgO6JYBAAAAAA0OECgYBWrVquI0cOWdo9Ho+WLLlK/fr1t6kyAAAAAOi+CAYBAAAAAB2qPhT8QIcPH7S0u91uLV58pfr3H2BTZQAAAADQvREMAgAAAAA6TCAQ0Jo1H+rQof2WdperPhQcMGCQTZUBAAAAQPdHMAgAAAAA6BCBQEAff7xSBw7ss7S7XC5dcsnlGjgw3abKAAAAAKBncNldAACg9crLy/Xss89q+fLlKiwslCSlp6dr5syZuuuuu5SezptqAACgczFNU+vWrdL+/Xss7U6nU4sWXa709CE2VQYA3YffH1DAlByG3ZUAAIDOimAQALqYPXv26Otf/7pOnjypESNG6IILLpDX69WhQ4f06quv6vrrrycYBAAAnYppmvrkk9Xau3eXpd3pdOriiy/T4MEZNlUGAN3Hqs1H9cqKPXK7HPr2DVM1PqO33SUBAIBOiGAQALqQsrIy3XrrrSopKdHvfvc7fe5zn7PsP3TokFJTU22qDgAAIJppmlq//mPt3r3D0u5wOLRw4aXKyBhqU2UA0H2cKq3WC+/tVMA0JUn//cZWPfrt+TZXBQAAOiOCQQDoQp555hmdOHFCP/7xj6NCQUkaPny4DVUBAADEZpqmPvtsnXbt2mZprw8Flygzc5hNlQFA97Jq89FQKAgAANAUh90FAABapra2Vv/85z+VlJSkL37xi3aXAwAA0CTTNLVx43rt2LHV0m4Yhi68cLGGDh1hU2UA0L34/AGt3nzM0jZnymCbqgEAAJ0dIwYBoAkVFRX6+OOPtX79em3btk0HDhxQeXm5EhISNGjQIE2dOlVXX321LrjgAhlG+67uvnXrVpWXl2vmzJlKSkrSunXrtHr1alVUVGjo0KFavHixRo8e3a41AAAAtIRpmsrN/Uzbtm2xtNeHgpdo+PCR9hQGAN1Q3u5TKq2ss7RdNpcPXwAAgNgIBgEgjr/+9a96/PHHVVtbG7XP5/Np//792r9/v9544w3NmjVLjz76qDIyMtqtnj179kiS+vfvr7vuuktLly617H/88cf1rW99S9/73vfarQYAAICW2Lx5o7ZuzbO0GYahBQsu1ogRfJAJANrSyrwjlu3Jo/ppxODeKi6utKkiAADQmREMAkAc+/fvD4WC6enpmj9/vqZMmaL+/furtrZWeXl5+te//qWqqipt2LBBX/nKV/TKK6+of//+7VJPaWmpJGnFihWSpHvuuUef+9zn5HQ69e677+qRRx7RH/7wB2VkZOjGG29slxoAAACas2XLJm3Zsimq/fzzF2rUqLE2VAQA3deJM1XaduCMpe3yeSPtKQYAAHQJBIMAEEf9p9oX6NZbb9W8efPkcFiXZb3++ut1++2367bbbtP+/ftVWFio3/3ud3r44YejznXvvfdqy5YtUe1NWbJkie6+++7QdiAQkCR5vV7deeed+sY3vhHa95WvfEU+n0+/+c1v9Ic//IFgEAAA2GLr1jzl5W2Iap8//yKNHj3OhooAoHv7KO+oZbtXslvnT22/mWwAAEDXRzAIAHH84Ac/UJ8+fZrsk5mZqSeeeELXXnutJOndd9/VL37xCyUlJVn6HTt2TPv372/V9YuKiizbycnJoduxgr8vfOEL+s1vfqOjR4/q8OHDGjZsWKuuBwAAcC62bduiTZs+jWqfO/cCjR07wYaKAKB78/oCWrPlmKVt0axh8rid8vsDNlUFAAA6O4JBAIijuVAwaOLEiRo1apT279+v6upqHTx4UBMnTrT0efHFF8+5nszMTEmSx+NRenp61P6UlBT169dPxcXFKioqIhgEAAAdZvv2rdqw4ZOo9tmzz9f48ZNsqAgAur+NO0+qotprabt0zgibqgEAAF2Fo/kuAIDmpKamhm4H1yVsa5MnT5Yk1dXVqbIyehF5v9+v8vJySdbRhQAAAO1p585t+uyztVHt5503TxMnTrGhIgDoGVZGTCM6aURfZQ5MjdMbAACgHsEgAJyjuro6HThwILSdkdE+6zkMGTJEU6bUv7m2fv36qP0bNmyQ1+tVUlKSRo8e3S41AAAAhNu9e4fWr18T1T5z5hxNmpRtQ0UA0DMcOVWpXYdLLG0Lp2faUwwAAOhSCAYB4By99dZboZF6U6ZM0cCBA9vtWrfffrsk6ZFHHlFhYWGo/cSJE/r1r38tSfq3f/s3eTyedqsBAABAkvbu3aV161ZFtU+ffp6mTJlmQ0UA0HN8lHvEst072a3p4wbYVA0AAOhKDNM0TbuLAICuqri4WFdffbVOnz4tSXrmmWe0ZMmSdr3m/fffr5dfflnJycmaMWOGHA6HcnNzVV5erpycHD333HNKSkpq1xriYYH77sfhMGQYhkzTVCDAnwxoOzy20B54XHWcHTu267333otqnzt3nubOnWtDRe2HxxXaC48tnK3aOp++/qtlqqrxhdo+f/FYfeWKSTyuujmnkzEeAIBz57K7AADoqurq6nTnnXeGQsHFixe3eygo1QeDM2fO1EsvvaTc3Fz5fD6NHDlSV199tW655RYlJCS0ew3x8CKl+zIMQ06nYXcZ6IZ4bKE98LhqXzt27NDSpUuj2ufMmaP58+fJMLrn957HFdoLjy201tr845ZQ0DCky+eNtLwe43EFAADiYcQgAJyFQCCge++9V2+++aYkafjw4Xr11VeVlpZmc2X2YsRg98MnjtFeeGyhPfC4an+7d+/WO++8rciXkTNnztKCBQu6ZSjI4wrthccWzta9z6zWrkMloe0ZEwbpF7fNkcTjqrvjw7gAgLbAiEEAaCXTNHXfffeFQsGMjAz99a9/7fGhoCSVldXI5/PbXQbaUL9+KXI6DQUCpoqLK+0uB90Ijy20Bx5X7evQoQP66KNlUaHgpEnZmjx5us6cqbKpsvbF4wrthccWzsahE+WWUFCSzp+SHnoM8bjqvlwup/r2Tba7DABAN8DHTACgFUzT1P33369XXnlFkjR48GA9//zzGjp0qM2VAQAAtJ/CwkNatWp5VCg4ceIUzZo1t1uOFASAzmhl3lHLdt9eCZo6tr9N1QAAgK6IYBAAWsg0Tf3yl7/U3//+d0lSenq6XnjhBQ0fPtzmygAAANrP0aOFWrlymQIB65Th48dP0nnnzScUBIAOUl3r07qC45a2C6dlyOng7T0AANBy/OUAAC0QDAVffvllSdKgQYP0wgsvaMSIETZXBgAA0H6OHTuiFSuWKhCwThU+duwEzZnTPdcUBIDOav22E6qta3w+NgzpgqlDbKwIAAB0RQSDANCMyFBw4MCBeuGFFzRy5Eh7CwMAAGhHx48f1Ycfvie/3xoKjh49TvPmXUgoCAAdyDRNrcw7YmnLGTtA/Xon2lQRAADoqggGAaAZDzzwQFQoOGrUKJurAgAAaD8nThyPGQqOGjVW8+dfRCgIAB1s/7FyHTpRYWlbOD3TpmoAAEBXRjAIAE148MEH9be//U1SYyg4evRom6sCAABoP0VFJ/Thh+/K5/NZ2keMGK3zz18oB2tZAUCHW5lrHS04IC1RU0b1s6kaAADQlbnsLgAAOqvHH39c//M//yNJMgxDX/3qV7Vv3z7t27evyeMmT56sjIyMjigRAACgTZ06VaTly9+R1+u1tA8fPlIXXLCIUBAAbFBV49Wn209Y2i7KyZCD0dsAAOAsEAwCQBybNm0K3TZNU4899liLjnv44Yd1ww03tFdZAAAA7eL06VNavvztqFBw6NDhuuCCSwgFAcAma7ceV50vENp2OgwtmMqHUQEAwNnhlR0AAAAA9HBnzpzWsmVvq66uztKemTlMF120RE6n06bKAKBnM01TK/OOWtqmjx+otBSPTRUBAICujhGDABDHiy++aHcJAAAA7a6kpFjvv/+26upqLe1DhmRq4UJCQQCw0+7CUh09VWlpuziH0YIAAODsMWIQAAAAAHqo0tISvf/+26qtrbG0Dx6coYsvvkxOJ58lBQA7rcw7YtlO75esiSP62lQNAADoDggGAQAAAKAHKisr1fvvv6WammpLe3r6EF188WVyuQgFAcBO5VV12rDjpKXtomkZMgzDpooAAEB3QDAIAAAAAD1MeXmZ3n//LVVXV1naBw5M16JFl8ntdttUGQAg6OP84/L5zdC2y+nQ+dmDbawIAAB0B3wEFAAAAAB6kBMnjmnNmhWqqrKuWTVgwCBdcskVcrs9NlUGAAgKmKY+iphG9LyJA9UrmedoAABwbggGAQAAAKCbM01TR48WKj8/VydPHo/a37//AC1efIU8Ht5wBoDOYMfBMzpxxjrV80U5mTZVAwAAuhOCQQAAAADopkzT1KFD+5Wfn6fi4lMx+/Tt21+LF18pjyehg6sDAMSzMtc6WjBzQIrGDU2zqRoAANCdEAwCAAAAQDcTCAS0f/8ebd2ap9LSkrj9+vXrr8WLr1JCQmLHFQcAaFJpRa1yd1s/zLFweqYMw7CpIgAA0J0QDAIAAABAN+H3+7Rnzy4VFGxWRUV53H7JySnKypqmceMmyunkZSEAdCartxyTP2CGtj1uh+ZNGWxjRQAAoDvhFSAAAAAAdHFer1e7dm3Xtm1bVF1dFbdfr15pysqaptGjx8npdHZghQCAlggETH2Ud9TSNntSupITeQsPAAC0Df6qAAAAAIAuqra2Vjt2bNX27VtVV1cbt1/fvv2UlTVdI0aMksPh6MAKAQCtsXV/sU6X1VjaLp6eaVM1AACgOyIYBAAAAIAuprq6Stu25WvXrm3yer1x+w0YMEhTp05XZuZw1qYCgC5gZe4Ry/aI9F4aObiXTdUAAIDuiGAQAAAAALqIiooKFRRs1p49O+T3++P2Gzw4U9nZORo8OINAEAC6iOKyGm3ee8rSdtF0nscBAEDbIhgEAAAAgE6urKxE+fl52rdvt0zTjNtv6NARys7O0cCB6R1YHQCgLazafFThT/GJHqfmTOL5HAAAtC2CQQAAAADopIqLTys/P1cHD+6L28cwDI0YMVrZ2Tnq27d/B1YHAGgr/kBAqzYftbTNmzJYSQm8dQcAANoWf10AAAAAQCdTVHRC+fm5Kiw8FLePw+HQ6NHjlJWVo9690zqwOgBAW9u857RKKuosbRflZNhUDQAA6M4IBgEAAACgEzBNU8ePH1V+fq6OHz8at5/T6dS4cZM0ZcpUpaSkdmCFAID2sjL3iGV7TEZvDU/vZVM1AACgOyMYBAAAAAAbmaapwsKDys/P1alTRXH7ud1uTZgwRZMmZSspKakDKwQAtKeTJdUq2F9saVs4PdOmagAAQHdHMAgAAAAANggEAjp4cJ/y8/NUUlIct19CQoImTcrWxIlT5PEkdGCFAICOsCrvqMyw7eQEl86bOMi2egAAQPdGMAgAAAAAHcjv92vfvt3aujVP5eVlcfslJSVrypSpGjduktxudwdWCADoKD5/QGu2WKePnp89WB6306aKAABAd0cwCAAAAAAdwOfzaffuHSoo2Kyqqsq4/VJTeykrK0djxoyX08kbwwDQnW3aVaSyKq+lbWEO04gCAID2QzAIAAAAAO2orq5OO3cWaPv2fNXU1MTtl5bWV9nZORo5cowcDkcHVggAsMvK3COW7fHD+ihjQIpN1QAAgJ6AYBAAAAAA2kFNTY22b8/Xjh0F8nrr4vbr33+AsrOna9iwkTIMowMrBADY6djpSu04VGJpWzg9w55iAABAj0EwCAAAAABtqKqqUgUFW7R793b5fL64/dLThyg7e7qGDMkkEASAHuijPOvagqlJbs0cP8imagAAQE9BMAgAAAAAbaC8vExbt+Zp795dCgQCcftlZg5TVtZ0pacP7sDqAACdSZ3Xr4/zj1naFkwdIreLqaQBAED7IhgEAAAAgHNQUlKs/Pw8HTiwV6Zpxu03YsQoZWVNV//+AzqwOgBAZ7Rh50lV1lhHlV+UwzSiAACg/REMAgAAAMBZOHWqSPn5uTp8+EDcPoZhaPTocZoyZZr69OnbccUBADq1lbnWaUSnjOyr9L7JNlUDAAB6EoJBAAAAAGgh0zR14sQx5efn6dixwrj9HA6nxo6doKysaUpN7dWBFQIAOrvCkxXac6TU0rZweqZN1QAAgJ6GYBAAAAAAmmGapo4cOaz8/FwVFZ2I28/lcmn8+MmaPHmqkpMZ+QEAiLYy74hlOy3Fo2ljmWYaAAB0DIJBAAAAAIgjEAjo8OEDys/PVXHx6bj9PJ4ETZw4RRMnZikxMbEDKwQAdCW1dX6tKzhuabtgWoZcTodNFQEAgJ6GYBAAAAAAIgQCAe3fv0dbt+aptLQkbr/ExCRNnjxVEyZMktvt6bgCAQBd0vrtJ1Rd6w9tG4Z00bQMGysCAAA9DcEgAAAAADTw+33avXunCgo2q7KyIm6/lJRUTZkyTWPHTpDLxcsqAEDLrMy1TiOaPbq/+qcx0hwAAHQcXsECAAAA6PG83jrt3Lld27dvUXV1ddx+vXunKSsrR6NHj5PDwbRvAICWO3C8TAeOl1vaFk7PtKkaAADQUxEMAgAAAOixamtrtGNHgbZv36q6utq4/fr27a/s7OkaPnwkgSAA4KyszD1q2e7XO0FTR/e3qRoAANBTEQwCAAAA6HGqq6u0bdsW7dy5XT6fN26/gQPTlZ09XZmZw2QYRgdWCADoTqpqfFq/7YSl7cJpGXI4+LcFAAB0LIJBAAAAAD1GRUW5Cgo2a/funQoE/HH7DRmSqezs6UpPH0IgCAA4Z59sO65ab+O/Ow7D0AVTM2ysCAAA9FQEgwAAAAC6vdLSEm3dmqd9+3bLNM24/YYNG6Hs7OkaMGBQB1YHAOjOTNPUytwjlrbp4waob68EmyoCAAA9GcEgAAAAgG7r9OlT2ro1VwcP7o/bxzAMjRw5RllZOerbt18HVgcA6An2Hi1TYVGlpe2i6YwWBAAA9iAYBAAAANDtnDx5XPn5uTpy5HDcPg6HQ2PGjNeUKdPUu3daB1YHAOhJIkcLDuyTqMkj+SAKAACwB8EgAAAAgG7BNE0dOHBAn3yyXkeOFMbt53Q6NX78JE2ePFUpKakdWCEAoKepqPbq0+0nLW0LczLlYP1aAABgE4JBAAAAAF3emTOntXTpv3TixPG4fdxutyZOnKJJk7KVmJjUgdUBAHqqtVuPy+cPhLadDkPnZw+xsSIAANDTEQwCAAAA6NKKi0/p/fffUl1dXcz9CQmJmjw5WxMmTJHH4+ng6gAAPZVpmlHTiM6cMFC9U/i3CAAA2IdgEAAAAECXVVJyRsuWvRMzFExOTtGUKVM1duxEud1uG6oDAPRkOw+V6HhxlaXt4umZNlUDAABQj2AQAAAAQJdUVlaqZcveVm1tjaU9LS1NkyZN1Zgx4+V0Om2qDgDQ063Ms44WHNI/WeOH9bGnGAAAgAYEgwAAAAC6nIqKCi1b9raqq60jMYYNG6Zrr71OZWW1NlUGAIBUVlmnjTuLLG0X5WTKMAybKgIAAKjnsLsAAAAAAGiNqqoqLVv2liorKyztgwcP0XXXXSeXi88/AgDstSb/mPwBM7Ttdjk0P2uwjRUBAADUIxgEAAAA0GXU1NRo2bK3VV5eZmnv16+/rrvuOnk8HpsqAwCgXsA09VHENKKzJw5SahLr3QIAAPsRDAIAAADoEurqarV8+dsqLT1jaU9L66vFi69SYmKiTZUBANBo24FiFZVY17+9aHqmTdUAAABYEQwCAAAA6PS8Xq8++OA9FReftrT36tVbS5YQCgIAOo+VuUct20MHpmpMRm+bqgEAALAiGAQAAADQqfl8Pq1YsVRFRScs7SkpqVqy5GolJyfbVBkAAFZnymuVt/uUpe3i6RkyDMOmigAAAKwIBgEAAAB0Wn6/Xx99tEzHj1tHXyQlJevSS69WamqqTZUBABBt9ZajCphmaDvB7dTcKYNtrAgAAMCKYBAAAABApxQIBLR69Qc6cuSwpT0hIVFLllylXr2Ylg0A0HkEAqZWbbZ+kGXO5HQlJbhsqggAACAawSAAAACATicQCOjjj1fq0KEDlnaPx6MlS65Unz597SkMAIA4tuw7reKyWkvbwukZNlUDAAAQG8EgAAAAgE7FNE2tX79G+/fvsbS7XG5dcsmV6tdvgE2VAQAQ38rcI5btkYN7aeRgRrcDAIDOhWAQAAAAQKdhmqY2bFin3bt3WNqdTqcWLbpMAwcOsqkyAADiO1Varfy9py1tC6dn2lQNAABAfASDAAAAADqNvLzPtH37Vkubw+HQwoWXavBgpmMDAHROqzYflRm2nZTg1JxJ6bbVAwAAEA/BIAAAAIBOYcuWTcrPz7O0GYahCy9crMzMYfYUBQBAM3z+gFZvPmZpmzdlsBI8TpsqAgAAiI9gEAAAAIDttm3LV17ehqj2BQsu1vDhIzu+IAAAWihv9ymVVtZZ2hbmMI0oAADonAgGAQAAANhq167t2rBhXVT7vHkXatSosTZUBABAy63MO2LZHjs0TUMHpdpUDQAAQNMIBgEAAADYZt++3frkk9VR7bNnz9e4cRNtqAgAgJY7caZK2w6csbQtzGFNXAAA0HkRDAIAAACwxcGD+/Txxyuj2mfMmK2JE7M6viAAAFrpo7yjlu2URJdmTRhkUzUAAADNIxgEAAAA0OEKCw9p9eoPZZqmpX3q1BnKysqxpygAAFrB6wtozZZjlrbzs4fI43baVBEAAEDzCAYBAAAAdKhjx45o5cplCgQClvbJk7M1bdpMm6oCAKB1Nu46qYpqr6XtIqYRBQAAnRzBIAAAAIAOc/Lkca1YsVSBgN/SPn78JM2cOVeGYdhUGQAArbMy1zqN6MThfTSkf4pN1QAAALQMwSAAAACADnH6dJE++OBd+Xw+S/uYMeM1Z84CQkEAQJdx5FSldh0usbQtnJ5pTzEAAACtQDAIAAAAoN2dOVOsZcvekddrnXJtxIjRmjfvQkJBAECX8lHeEct272S3ZowfaFM1AAAALUcwCAAAAKBdlZWVaNmyt1VXV2tpHzp0uBYsuFgOBy9LAABdR63Xr7X5xy1tC6ZmyOXk3zMAAND58RcLAAAAgHZTUVGu999/WzU11Zb2IUMyddFFi+V0Om2qDACAs/PZ9pOqqm2cFtuQdGFOhn0FAQAAtALBIAAAAIB2UVVVqffff0tVVZWW9kGDBmvhwkvldLpsqgwAgLO3MmIa0Smj+2lQnySbqgEAAGgdgkEAAAAAba66ulrLlr2tiopyS3v//gO1aNHlcrvdNlUGAMDZO3SiXPuOllnaFuZk2lQNAABA6xEMAgAAAGhTtbU1Wr78HZWWllja+/Tpp8WLr5DH47GnMAAAztHKvKOW7T6pHk0b29+magAAAFqPYBAAAABAm6mrq9MHH7yrM2dOW9p7907TkiVXKiEh0abKAAA4N9W1Pq0rOG5pu3BahpwO3l4DAABdB3+5AAAAAGgTPp9PH374nk6dKrK0p6b20pIlVykpKdmmygAAOHfrt59QbZ0/tG0Y9cEgAABAV0IwCAAAAOCc+f0+rVixVCdPWkdSJCenaMmSq5SSkmpTZQAAnDvTNLUy94ilbdqYAerXm5HwAACgayEYBAAAAHBOAoGAPvpouY4ds75hmpiYpCVLrlKvXr1tqgwAgLax/1i5Dp2osLQtnJ5pUzUAAABnj2AQAAAAwFkLBAJas+ZDFRYesrR7PAlasuRKpaX1sacwAADa0Mo864df+vdOVNaofjZVAwAAcPYIBgEAAACcFdM0tW7dKh04sM/S7na7tXjxFerbt79NlQEA0Haqarz6dNsJS9tFORlyOAybKgIAADh7BIMAAAAAWs00TX366cfau3eXpd3lcmnRois0YMAgmyoDAKBtrd16XHW+QGjb6TB0wdQhNlYEAABw9ggGAQAAALSKaZratGm9du7cZml3OJxauPBSpacPtqkyAADalmma+ijvqKVt+rgBSktNsKkiAACAc0MwCAAAAKBVtmzZpIKCLZY2wzC0cOESZWQMtakqAADa3u7CUh05VWlpWzg906ZqAAAAzh3BIAAAAIAWKyjYrM2bN1raDMPQBRdcoqFDh9tUFQAA7WNl3hHLdnrfJE0c0demagAAAM4dwSAAAACAFtmxo0AbN66Pap8//yKNHDnahooAAGg/5VV12rDjpKXtopxMOQzDpooAAADOHcEgAAAAgGbt2bNTn376cVT7nDkLNGbMeBsqAgCgfX2cf1w+vxnadjkNnZ/NOroAAKBrIxgEAAAA0KT9+/dq3bpVUe2zZs3VhAmTbagIAID2FTBNfRQxjeisiYPUK9ljU0UAAABtg2AQAAAAQFyHDx/UmjUfyjRNS3tOzixNnjzVpqoAAGhfOw6e0Ykz1Za2hTmZNlUDAADQdggGAQAAAMR09GihPvpoWVQoOGXKNGVnT7epKgAA2t/KvKOW7YwBKRo3NM2magAAANoOwSAAAACAKCdOHNOKFUsVCAQs7RMnTtGMGbNlGIZNlQEA0L5KK2qVu6vI0rYwJ4N/+wAAQLdAMAgAAADA4tSpk/rww/fk9/st7WPHTtB5583njVEAQLe2essx+QONo+U9LofmZw22sSIAAIC2QzAIAAAAIOTMmdNavvxdeb1eS/vIkWM0d+4FhIIAgG4tEDC1arN1GtHZk9KVnOi2qSIAAIC2RTAIAAAAQJJUWlqiZcveVl1draV92LARWrDgYjkcvHwAAHRvW/cX61RpjaVt4fRMm6oBAABoe7yyBwAAAKDy8jK9//5bqqmxvhmakTFUF164mFAQANAjrMw9Ytkenp6qUUN62VQNAABA2+PVPQAAANDDVVZWaNmyt1VdXWVpT08fooULL5XT6bSpMgAAOk5xWY027z1laVuYk8k02gAAoFshGAQAAAB6sOrqKr3//tuqqCi3tA8YMEiLFl0ml8tlU2UAAHSsVZuPyjQbtxM8Ts2ZnG5fQQAAAO2AYBAAAADooWpqarRs2dsqLy+1tPft21+XXHKF3G6PTZUBANCx/IGAVm0+ammbN2WwkhL4gAwAAOheCAYBAACAHqiurk4ffPCOSkrOWNrT0vpoyZIrlZCQYFNlAAB0vM17Tqukos7StjAnw6ZqAAAA2g/BIAAAANDDeL1effDBuzp92rqOUq9evbVkydVKTEyyqTIAAOyxMu+IZXt0Rm8NT+9lUzUAAADth2AQAAAA6EF8Pp9WrFiqoqITlvbk5BQtWXKVkpOTbaoMAAB7nCypVsG+YkvbwpxMm6oBAABoXwSDAAAAQA/h9/v10UfLdPy4dQ2lpKQkXXrp1UpNZWQEAKDnWZV3VGbYdlKCS+dNGmRbPQAAAO2JYBAAAADoAQKBgFav/lBHjhy2tCckJGjJkqvUu3eaTZUBAGAfnz+gNVusH5g5P2uwEtxOmyoCAABoXy67CwAAAJ1HwAzIG/DJG/DK6/eqtrxSAfllmlJZZbUMGTIMQ4YcDV8NOQwjdFvBbTW2WfrJkAxDjoj9ANqXaZpau/YjHTq039Ludnu0ePFV6tOnn02VAQBgr027ilRW5bW0XTS9a04jWnB6p/756b9UVHlaTodTDjnldDjkMBxyGs76/xwOOQynnE21OZwNxzT2cTiCtyP7NLRZtoP9HHI4nBHHBa9Vfzu4L35b47kBAEDbIBgEAKCTCpgB1fm98jUEdXUNYZ034LWEd3WBsDZ/7Nt1/rqG7Tp5G27XNez3BXzymj7VBfzyK2DLfQ3FisGwUIYMo749MkSMHTo6rG2GI6pf43nC+yqinyPG9RTjOGtd4YGos+HND5flq0tOh0Muwymnw9Xw1Rl6UyVWuyvia30/V1h/3iBBy5imqfXr12jfvt2WdpfLpcWLr1D//gNsqgxAd2OapkpqS1VeVyGP06MEp0eehv9chpMPA6FTWpl7xLI9flgfZQ5IsamasxcwA3p+60uq9NdIkvz+gCSv5Le3rrZiyAgLGmOHh5Y2R1ioaTgitoPHODQwaYDmDJmp3h6mUwcA9BwEgwAAtIA/4I8RyDXejtznbSbM8wa8DaGftyG0q2vcH/DJa2NIZwdTZv26LqYZ3ohmGDJCIaGr4U0Pl6MhPGwIGh0tCh6DwaU1eGxZcOkKvRkT6/jg+V0EmbYwTVMbNnyiXbu2W9qdTqcWLbpcAwem21QZgO7on3ve0oeHV8fc5zAc9UGhozEwjPwavJ3giNyfELbtVoLDowRXQuhcTgdTPuLsHDtdqR2HSixtC3MyOuz6pmnKb/qjXieEXkvE+dBf+GuOukD9a4pT1cWhULA7MmXKZ/olf9snnRtPbtY9M7/LcwkAoMcgGAQA9CgBM6Dck/nadWaPakMvrsPDvOBoumBQV/9foAeFdOg6TJnymj55/T67S2mR+k96G3IaTrkdYSNHzOBeQzIa+wb3B6egjWxv2CM1jOxs7KvQSE4F/x/ZR7KMXDFinCtmTcFbhiLPFmoLa2loCFYTdmbLfYs+f8NhodbwEbXh0/HW93DIYTTeJ0fwaoahQGGVAkerI38Qco3rrbXlm2RU5MYYhRs5kjZ8JG/jdR0No2xjXdcR+jk4mjinGkf3ht0/6zTD1vsampI4eB0jdGY5DENnlCSX0ynTNFVeURv6vjoiz9nMKOQma41zf4CerrS2XCsOr4m7P2AGVO2rUbXaPrhwGs7GUDEYMkYEkFHhY0SfYACZ4HQ3hpAOQsfu7qO88LUFTaUkOzVpdKpKakvlC/iswVz4B/wCPvnCPizoC30g0Nf4+iJOmFfXMGNH8AOEJp+Gs93h8iOq9tUo1dP1RooCAHA2CAYBAD3KioMr9c9979ldBtAj1X/S25TPDKg24G3+AJyTIWV9NLzMOk2oKVO7+x3XmYo9UoVNhXVTkdMMK8Z27KmK4wSOcUPK5oLM4Dkc1gA5Ti0Oo3HtKcMwQlOw1bcboX2OhvMFp19zOBq3G/sGp3Br3A6uUWUE15oK+y9yO16bQ9H7CGM7n6KqItsCDr/pV7WvWtW+6uY7t5LL4QqNYKwPDN2hEYyeiNGNMcPHsJAxsr2tmA0zLoR//82wWRjM8D2mpVfodn3/4HnC/m9aW0JHmBHXi9m/iXrMyCMbbpvhrWGVmPWjxbwRYZxlFo8mgrlQGBfxwcBTvkolzghIDr8MR/1HAX+y7p3Y3+gu6PbCM5IkvyEFDEN+NXw1pIAkv2EoYEh+NbQZUqDhdnBfsJ/12GCf+rZQn4b+wWuFzh/qE15HY7tp83N6Zo1XCV3kg3YAALQFgkEAQI+y/cBau0s4J4ZpymVKbtOUO2DKbZpymabcpuq/NrRFbgePcYXtd5umXIZTbsMpt8Mlt8Mtj8Mtl8Mtj8sjt9OjlMRkud1umQFTtXV1MgMBmWZAAdOUaQZkmn6ZodsBmYGAAobZ0K+hXWH7ZIb1Nxv21W8HpMZ2M9DwplEg9LaQKck0wm7LiNgO7jdi9K1/wyH2uWL0b2pf5LnC9gXU+OaHL+zNDp/R+AaLP+zNF394u6zHBd84Ac5GenlazFBwb78TOpNUaVNV3ZvZ8PwWqN9AOzNkxA0WrSGkMyzgtG47DGd9uOkI26fGfc6GgDMpKUEuh1OGHPLW+iNCyrCgNE6IGVwTK2a7Ea898n454+7vLMpKC+0uoV34Aj75Aj5V+qra/Nxuh1sOwwg9f0iNIVjwdlPhGtqAOzRwv9uZXVqtyXNvkSSZAb/k90kBvxTw128H/FLAF7Ydtj+yr9l4fOOxwf2+2Nv+huNaIBQahgWUjWFlY6gY/Ns6EPY3dcBQRFgZ7GMNNOvPaQ1DA4bUx+fXDK9bLndS+/wgAADohAgGAQA9yrg6aXsb/evnCA/lQoGbdTvefncguK8htAv2V/2bRB7DWR/QOd1yOzxyOd3yuBLkdLrlcCVIbo8Ml0dyuuu/ujwynA1fXR7J1dDu9ETsd8twJUgud/2xzbyh2K9fipxOh/z+gIqL7QkTTNNseIMsIAUC9bfNgNQQRioQkGQ27AtE7GvmONPabt0X3m62/LioN1zCt2O9yRL/DRoz4Jc/4Jff9Mtn+uUPBOQ367f9agwcA0bD7WYCx8bQsrE9+Glvv+X44L6GoLLhjRNfWHv98RHXIsjsFAZW9tLI0oFR7fv7Ful0MsME0T2E1ppq4ZvO3ZUhI2ZgGBk6Ro7YdLYodAwbAeoI2xcVftb3O3hym6W2YTVe3Xq0RHWGoTqHoTrDkNdhqLbhdp3DkNcwVOeQah0OeQ2F+sX8Gna7u3xwxsvoecThCL5uCCjsNUP9duNrjOjt4OuKgV6fZgybKff4Bbbej/q/48P+HvZHhoixAsqGbX+s0DFW3/q2mNv+2P3Dz+voM0CeaVfWv2YCAKCHIBgEAPQoi8ZdqV5r/z8dTHTLqYhQLiy0s7zAVv2IOo+zYTSd0yO30y2XK6ExcHNHB3H1t92NYZ0lpHNLroSo/Qpfdw2SGtZiMwxJDikix+zJ36n6ILIlb6w0sz/Gp79jhphN7W/4RLgZ8CkQ8MvX8J/f9NWHmgG//GYw1AzIZwbkV324Ggj4G6Y1Cx8FUb8+XfhYCDPeVyOyjxHdz4h9rPU8MY6LdduIt99obAsbRRqv/uj7Z71+5LXCj48c/Rp+PZ+vr/y1gxQl4YhG+4o1qjTW6Fvr/W9q5Kyi9kePno1bp+rD6fDvV2QdajguYDlP+P54I4WbqrMnP1OguzNlhj4w0tkipl5+Uyl9hynFVyfT75WCX2u95xzo+qRQsFjrsAaPdQ1t3rghY/wA0tuNQke0HVfYB/pifQAwclaO0GuIQNj+0Ewe1u3QTCBh/V2mqaiVLQ2j4bWDJ+yrp+EDf57GdneCklJT5Bk4VMmTFuhMmb3PDPV/x7skR/3bj/x2AQDQORAMAgB6FPeoGZrb/0Gdd3KvZDgbgrmEhqAuPNRrGFnndMtwdJ5puoAgw3BITofkdNdv21xPa0WORjWDoy/DR34GAo0BaNg+0/TXjwY1I9oj+qmhnxnRTwF/xOjPxmuYAeu2pS2ilsbagp+GDx892tAePG/oepE1R1+v/txhxwePa8ZRRz996h7WEKQ3muw9qAk1R9rrR9klmJH/NTtdsDWsNRvWTFLwmKbCyRj7gscFwvo2ntuIU1/sKYul+inRrGFq09ePd1+D5wlOuRacri04TVtwbalQe8NUbfHag98nf9S2EQqFg2tLmRHnIsDtfnon9FbK5x6Iuc8M+BuDQl+dTH+d5PNKfq9MX53kr5Pp80b0qf8qv1duX52SIvvEODa8LWxRu7hM1Y+ODw8Z441abLKtIYD0NmzXNoSYPM7PXfisG+HBWvgIusYQz5SrmRF2kTN7BD80GD4Cr/lXAobkjhHaeTzRYZ47VqjXGO5Ftbnrv8rhavGHB/uG/Y2lTveRAQAA0BkQDAIAehxH70Fy9I4xogaAbQzDIRmMCm2KGRFuhoehR48e1WfrPrasPyVJU8aMUs64S+oDSEv46G8MN4OhpiXEDAskowLaOCGqpd2v6CA2XpAbHrZaawn1s4S9DbdbwVDEY8mMuGFG9DbCvhqGZDoabjc8QA0jdNuI0RarnwxHwymN0G3J0XgNxbiG6kdMGzHarNcIa5cRNtLaiN4ffjs4VbMZ4+cfN3QP+zlHPqZMM7pvcLrnZgR7NRc+Nra3PLBsUXtEH0uoqeBo14bpmyPPFb7mVQvPH97HH9Gnu4RH41xpcfcZDqfkSZKhjlvTywz4JF9EeNgQSEYGlKE+fq9lu75vnGPqGsLNUFBpDSPDQ8fwQNHyfYl124xsN2P3NZs4R+Rts/GAyD7R/Zo6l9lEndG349dpRvWNdX1HxP6WMaLCONPp0d4T1arxO1Upl7ymU0PS+2rYsAGhIC5WaBcM9yxt7oRWhXYAAACdAcEgAAAA0AWERomGt0k6fvyoPlq/ToGIUHDSpCzNmDWvW75ZaYav8RkWQPVJS5DDMBQwpTMlVfFDu1DwFxHQBUM1tKmoEcGRo3Yjw+HwNVtjjsL1hwXT1sC46RDTbLpvZAjesM/tUn1/v0/e2roY0ys3vWZWS0aqhYsMSS1hZAtCzpjhZVgffxPn8YeN7LT0jzpPw1ejcR3a4PmcpqnJlbXKGTe/jR5BbcNwuCSPS4anY8JIM/h49dfVB4vBUNHvDY1oTE1yyGFIgYCp8orqhtDMjHjMmI3t9ScOazfDL9jYHr4dMV13eHt0//APS8S6XrDd2q+xDLOZesPvU/x6rffdbGg2ZThdoekyw2f6qA/qYoR2wVlAIp7XV20+qud27QhtG5J+e9k8JaZ1XFANAABgJ4JBAAAAoIsqKjqhDz98T36/darR8eMnaVY3DQWl4JpFTklOhS/E5ExpnD7NUctLnc4i1ojgrvTIjJz6uLWi1oT1xwoSm1rX1RfzdnN9FfA1sd5sWH9/WH9/E/1bsS6gI22wPJMWtvp71Z0YhiE5XZLTJcOTHLNPcthjq+YsHls4OytzrdNrZ4/prwGEggAAoAfh1TIAAADQBZ0+fUrLl78rn89naR89epzmzFnQbUNBoKvp6mvCBpnBEZf+ZkJHh1OOtMGs0YxO6cDxMh04Xm5pW5iTaVM1AAAA9iAYBAAAALqYkpJiLV/+trzeOkv7iBGjNH/+RYSCANpc/Uhdl+RwSUrosgEneraVuUct2/16J2jqmP42VQMAAGAPPsIHAAAAdCFlZaVatuxt1dbWWtozM4drwYJFcjBKBwCAKNW1Pq3fdsLSduHUDDkcxNwAAKBn4V0DAAAAoIuoqCjX+++/perqakv74MEZWrhwsZxOZ5wjAQDo2T4pOK5ab+NamQ7D0AXTMmysCAAAwB4EgwAAAEAXUFVVqWXL3lZVVaWlfeDAdF188WVyOlklAACAWEzT1IrcI5a2nHED1LdXgk0VAQAA2IdgEAAAAOjkamqqtWzZ2yovL7O09+8/QJdccoXcbrdNlQEA0PntPVqmwiLrB2sW5jBaEAAA9EwEgwAAAEAnVltbq2XL3lFpaYmlvU+fvrrkkivl8XjsKQwAgC7io4jRggPSEjV5VD+bqgEAALAXwSAAAADQSXm9dfrgg3d15sxpS3vv3mlasuQqJSYm2lQZAABdQ0W1V5/uOGlpWzg9Uw7DsKkiAAAAexEMAgAAAJ2QaZpauXKZTp2yvpmZkpKqJUuuUlJSsk2VAQDQdazdelxeXyC07XQYWpA9xMaKAAAA7EUwCAAAAHRCO3du07Fj1qnPkpKSdemlVyslJdWmqgAA6DpM09TKiGlEZ04YqN4pTMMNAAB6LoJBAAAAoJOprKzQpk2fWtoSExN16aVXq1ev3jZVBQBA17LrcImOF1dZ2hbmZNpUDQAAQOdAMAgAAAB0IqZpav36NfL5vJb2Cy9crLS0PvYUBQBAF7QiYrTg4H7JmjC8jz3FAAAAdBIEgwAAAEAncuDAXhUWHrK0jR07UYMHZ9hUEQAAXU9ZZZ027iyytC3MyZBhGDZVBAAA0DkQDAIAAACdRE1NjT77bK2lLSkpSTNnzrGpIgAAuqaP84/JHzBD2y6nQ/Ozh9hYEQAAQOdAMAgAAAB0Ehs3fqKamhpL2+zZC5SQkGBTRQAAdD0B09TKPOs0orMnDVJqktumigAAADoPgkEAAACgEzh6tFB79+6ytA0fPlIjRoyyqSIAALqmbQeKVVRi/aDNwpxMm6oBAADoXAgGAQAAAJt5vV598slqS5vb7dHs2efbVBEAAF3XR7lHLdtDB6ZoTGZvm6oBAADoXAgGAQAAAJvl5W1QRUW5pW3mzDlKTk6xqSIAALqmM+W1yt19ytK2cHqmDMOwqSIAAIDOhWAQAAAAsNGpUye1Y8dWS1t6+hCNGzfRpooAAOi6Vm85qoBphrY9bofmTh5sY0UAAACdC8EgAAAAYJNAIKC1a1fJDHsD0+Fwat68CxjZAABAKwUCplZttk4jOndyupITXTZVBAAA0PkQDAIAAAA22bo1TyUlxZa2adNmqnfvPvYUBABAF7Zl32kVl9Va2hZOz7SpGgAAgM6JYBAAAACwQWlpibZs2WRp69u3v6ZMmWpTRQAAdG0rc49YtkcM7qWRg3vbVA0AAEDnRDAIAAAAdDDTNLVu3SoFAoFQm2EYmjfvQjkc/IkOAEBrnSqtVv7e05a2ixktCAAAEIV3HQAAAIAOtmvXdp08edzSNmlStgYMGGhTRQAAdG2rNh+TGbadlODU7EmDbKsHAACgsyIYBAAAADpQVVWlNm1ab2lLTe2lnJxZNlUEAEDX5vMHtHrzUUvb3CmDlehx2VQRAABA50UwCAAAAHQQ0zT1ySdr5PV6Le1z514gl4s3LwEAOBt5u0+ptLLO0rYwh2lEAQAAYiEYBAAAADrIwYP7VVh40NI2Zsx4ZWQMtakiAAC6vo/yjli2x2amadigVJuqAQAA6NwIBgEAAIAOUFtbo08//djSlpiYpFmz5tpUEQAAXd+JM1UqOHDG0rZweoZN1QAAAHR+BIMAAABAB9iw4RPV1FRb2mbPPl8JCYk2VQQAQNf3UZ51bcGURJdmTRhkUzUAAACdH8EgAAAA0M6OHi3U3r27LG1Dh47QiBGjbKoIAICuz+sLaM2WY5a287OHyON22lQRAABA50cwCAAAALQjn8+nTz5ZbWlzu92aM+d8GYZhU1UAAHR9G3edVEW119J2UQ7TiAIAADTFZXcBAICWWb9+vb761a8222/OnDl64YUXOqAiAEBL5OVtUEVFuaVtxow5SklJtakiAAC6h5W51mlEJw7voyH9U2yqBgAAoGsgGASALmLAgAG6/vrr4+5ftmyZKioqNHv27A6sCgDQlNOni7R9e76lbdCgwRo/fpJNFQEA0D0cPVWpXYdLLG0Lp2faUwwAAEAXQjAIAF3EmDFj9Jvf/CbmvmPHjumNN96QYRi69tprO7gyAEAsgUBAa9eukmmaoTaHw6F58y5kClEAAM7Ryrwjlu1eyW7NGD/QpmoAAAC6DtYYBIBu4I033lAgENCsWbM0bNgwu8sBAEgqKNiiM2dOW9qmTp2htLQ+9hQEAEA3Uev1a23+cUvbgqlD5HLyNhcAAEBz+IsJALqB//u//5MkXXfddbbWAQCoV1ZWqs2bN1ra+vTppylTptlUEQAA3cdn20+qqtZnabsoh2lEAQAAWoKpRAGgCRUVFfr444+1fv16bdu2TQcOHFB5ebkSEhI0aNAgTZ06VVdffbUuuOAC26aF27x5s/bv36+kpCRdfvnlttQAAGhkmqbWrVulQMAfajMMQ/PnXyin02ljZQAAdA8fRUwjmjWqnwb1SbKpGgAAgK6FYBAA4vjrX/+qxx9/XLW1tVH7fD6f9u/fr/379+uNN97QrFmz9OijjyojI6PD6wyOFlyyZIlSU1M7/PoAAKvdu3foxIljlraJE7M0YMAgmyoCAKD7OHSiXHuPllnaGC0IAADQcgSDABDH/v37Q6Fgenq65s+frylTpqh///6qra1VXl6e/vWvf6mqqkobNmzQV77yFb3yyivq379/h9VYV1end955R5J0/fXXd9h1AQCxVVVVauPG9Za21NReysmZZVNFAAB0Lyvzjlq2+6R6NG1sx70GAwAA6OoIBgEgDsMwtGDBAt16662aN2+eHA7rsqzXX3+9br/9dt12223av3+/CgsL9bvf/U4PP/xw1LnuvfdebdmypVXXX7Jkie6+++4m+6xYsUIlJSUaPHiw5s6d26rzAwDa3qeffiyvt87SNnfuBXK73TZVBABA91Fd69O6guOWtgunZcjldMQ5AgAAAJEIBgEgjh/84Afq06dPk30yMzP1xBNP6Nprr5Ukvfvuu/rFL36hpCTr+hbHjh3T/v37W3X9oqKiZvsEpxG99tpro4JLAEDHOnhwvw4dOmBpGz16nDIyhtpTEAAA3cz67SdUWxe+hm99MAgAAICWIxgEgDiaCwWDJk6cqFGjRmn//v2qrq7WwYMHNXHiREufF198sc3rKy4u1urVqyVJ1113XZufHwDQcnV1tfr00zWWtsTERM2aNc+migAA6F5M09TK3COWtmljBqhf70SbKgIAAOiaGF4CAG0gNTU1dDu4LmF7e+utt+T1epWTk6PRo0d3yDUBALFt2LBe1dXVlrbzzpuvxETerAQAoC0cOF6uQycqLG0LpzNaEAAAoLUIBgHgHNXV1enAgQOh7YyMjnlxGpxGlNGCAGCv48ePas+eHZa2zMzhGjlyjE0VAQDQ/ayIGC3Yv3eCskb1///Zu+/guNP7zvOfX3ejgUbOAAEQBHPOBEByOBxOpEZjnSXZstcnS5Z09m55dyWXymHrfD7ZssvW3Ule+2odyiuvJY20SvbZsqw0iTmCOXNIAgSJRCJndPz97g+QIB4wAUT4dQPvV9UU0Z9On9G0CKC//TyPS20AAAASF1uJAsAk/ehHP1JfX58kafXq1SooKJj257x+/bouXbokv9+vD37wg9P+fOOVyTY+s47HY438mZub5nIbzCaz5bUVjUb1wx+aW4gmJSVp9+5XlZmZ/ph7YbrMltcV4guvK0wXXlvj1z8UUc2Vu0b2gW0Vys/ne+1YvK4AAMDTMBgEgEno7OzUV77ylZHLv/mbvzkjz/sv//IvkqSXXnpJWVlZM/Kc4+H1shB9trIsS16v5XYNzEKJ/to6fPiYuru7jez5559XTk62K30wLNFfV4hPvK4wXXhtPd3Bs00KR+yRy16Ppde2VvD7xxPwugIAAI/DYBAAnlE4HNZnP/tZdXR0SJJeeeUVvfrqq9P+vLFYTP/2b/8mSfrIRz4y7c83EbGY/fQbIaF4PJYsy5LjOLJtx+06mEVmw2urtbVVJ0+eNLJ58+Zp7dp1/H3oktnwukL84XWF6cJra3wcx9FPj9QbWfXqYmWl+fl++wi8rmY3huEAgKnAYBAAnoFt2/r93//9kTeEy8vL9Wd/9mcz8txer1cHDx6ckeeaqN7eoKLRmNs1MIVyc9Pk9VqybUednQNu18EskuivLdu29bOfvSXHefCGm8fjUWXlDnV1DbrYbG5L9NcV4hOvK0wXXlvjc62hW7fv9hnZttVF/G/2GLyuZi+fz6ucnFS3awAAZgE+ZgIAE+Q4jv7wD/9wZNVeSUmJvva1r8XVlp4AgOl1+fIFdXa2G9natRuVnZ3jUiMAAGan/WebjMuFOQGtXMD3WwAAgGfFYBAAJsBxHP3RH/2Rvv/970uSiouL9Y1vfENlZWUuNwMAzJTe3h6dO2duIZqdnaM1aza4UwgAgFmqbzCsE1fbjGzXhlJ5LM7OAwAAeFYMBgFgnBzH0Re/+EV997vflSQVFRXpzTffVHl5ucvNAAAzxXEcHTt2ULGYuW3ytm075fV6XWoFAMDstO9Mk6KjzhH0eS09t7bYxUYAAACJj8EgAIzD/aHgd77zHUlSYWGh3nzzTS1YsMDlZgCAmXTjxvu6c6fZyFasWKOCgiKXGgEAMDtForbeO21uI1q5olAZqX6XGgEAAMwODAYB4CnGDgULCgr05ptvqqKiwt1iAIAZNTQ0qFOnjhlZWlq6Nm7c4lIjAABmr2OX76h3IGxkr1WyWwsAAMBkMRgEgKf44z/+44eGggsXLnS5FQBgptXUHFY4bL5BuXXrDiUlsXIBAICp5DiO3j7RYGQryrO1oDjDpUYAAACzh8/tAgDmNtu2df78eZ0/f15Xr15VU1OT2tvbNTQ0JEkKBALKz89XaWmpVqxYoXXr1mndunXyeGbmcw1/8id/om9/+9uSHgwFFy1aNCPPDQCIH7dv1+vWrZtGtnDhEpWWsnIBAICpdqm+U01tA0a2u4rvuQAAAFOBwSCAGReLxXTw4EH98Ic/1KFDh9TX1/fE29fV1RmXMzIytGPHDn3oQx/Szp075fV6p6XnX/zFX+hb3/qWJMmyLH3yk59UXV3dQ33GWrVqlUpKSqalEwBg5oXDYR0/fsjIkpOTVVm5zaVGAADMbm/VmKsFi3NTtXZxnkttAAAAZhcGgwBmTG9vr77zne/of/7P/6m2tjZJw1vEPMvj/PSnP9VPf/pT5efn61d/9Vf17/7dv1NWVtaU9j19+vTI147j6M///M/Hdb8vfelL+uhHPzqlXQAA7jl9+riGhgaNrLJyu1JSAi41AgBg9mps7delm51G9lrVfHksy6VGAAAAswuDQQDTbmhoSP/wD/+gr3/96+rv75f0YCBoWZYWLVqkFStWaPHixSoqKlJOTo4CgYAcx1EwGFRnZ6fu3r2ruro6Xb16VXV1dSP3b2tr01/+5V/qq1/9qj796U/r05/+tFJTU137dwUAzC5377bo2rUrRlZSMl8LFy5xqREAALPb2LMF0wNJ2r662KU2AAAAsw+DQQDT6kc/+pG+/OUvq7W1dWSYl5aWpl27dunVV19VdXW1cnJyJvSYXV1dOn78uN555x3t379f/f396u/v11/91V/pe9/7nv7Lf/kveuONNybd/Zvf/OakHwMAkLhisaiOHj1gZD6fT1u37pDFqgUAAKZcT39Ixy7fMbKXNpXKnzQ9x0cAAADMRQwGAUyr3/md3xn5et26dfqVX/kVvf7660pJSXnmx8zJydEHPvABfeADH1AoFNJPfvITfec739H58+fV2tqq3/3d352SwSAAYG47f/60ent7jGzjxkqlp2e41AgAgNntvdONisYeHDfh83r04qYyFxsBAADMPgwGAUy7qqoq/af/9J9UXV095Y+dnJysj3zkI/rIRz6i48eP66//+q914sSJKX8eAMDc0tnZoYsXzxlZfn6hli9f7VIjAABmt1Akpr2nm4xs2+oiZaX5XWoEAAAwOzEYBDCtvvrVr+r555+fkeeqrq5WdXW1Dh06NCPPBwCYnWzb1tGjB0a2wJaGz8Tdtm2nPB6Pi80AAJi9jlxo0UAwamSvVZW71AYAAGD24p0NANNqpoaCo+3YsWPGnxMAMHtcvXpRHR1tRrZ27Ubl5OS61AgAgNnNdhy9faLByNYuylNpfppLjQAAAGYvBoMAAADAPX19vTpzxtySOisrW2vXbnSpEQAAs9+5G+262zVkZK9VzXepDQAAwOzGYBAAAACQ5DiOjh07qFgsZuTbtu2U1+t1qRUAALPfWzXmasGygnStWpDjUhsAAIDZjcEggIQRiUTU3t6uaDT69BsDADBBdXXX1dLSZGTLl69SYWGxS40AAJj9brb06lpDt5Htrpovy7LcKQQAADDL+dwuAGBua2gY/mSo3+9XUVHRI29z69YtfelLX9Lhw4cVjUbl8Xi0bds2/Zf/8l+0dOnSmawLAJilhoYGdeLEUSNLTU3Txo1VLjUCAGBuGHu2YFa6X9WrHv27IQAAACaPFYMAXHP+/Hm99tpreu211/R3f/d3j7xNS0uLfvmXf1n79+9XJBKR4ziKxWI6dOiQfumXfknnzp2b4dYAgNnoxIkjCodDRrZ16w75/X6XGgEAMPt19AR14kqrkb2yuUw+L29XAQAATBd+0gLgmn379slxHEnSRz/60Ufe5ktf+pK6u7sfed3Q0JB+93d/V5FIZLoqAgDmgIaGW6qvrzOyiorFKitb4FIjAADmhndPNci+9zuhJPmTPHphQ6mLjQAAAGY/BoMAXHN/tV9OTo7WrFnz0PV3797VO++8I8uylJKSoi9/+cs6deqUfvSjH43cvqGhQT/96U9ntDcAYPYIh8M6fvyQkfn9yaqs3O5SIwAA5oahUFQHzjUb2Y6185QeSHKpEQAAwNzAYBCAaxoaGmRZllasWPHI6999992RFYW/8Ru/oQ996ENKS0vTkiVL9OUvf3nkdnv27JmRvgCA2efMmRoNDg4YWWXlNgUCAZcaAQAwNxw816yhUGzksiXp1cr57hUCAACYIxgMAnBNe3u7JKmo6NEHyx8/fnzk61/4hV8wrlu4cKHWrFkjx3F05cqV6SsJAJi1Wlvv6P33LxvZvHmlWrRoqUuNAACYG2K2rXdONhrZxmUFKspJdakRAADA3MFgEIBrQqGQJCklJeWR158+fVqWZWnJkiWPHB7Onz/8adL7A0YAAMYrFovp6NEDRubz+bR16/OyLMulVgAAzA2n3m9TR2/QyF5jtSAAAMCMYDAIwDV+v1+SNDg4+NB1t2/fHhn4bd68+ZH3z8zMlCQFg8FHXg8AwONcuHBGPT3dRrZhwxZlZGS6UwgAgDnCcRy9VdNgZAvnZWppWZZLjQAAAOYWBoMAXJOXlydJqq2tfei6gwcPjny9cePGR96/v79f0uNXHAIA8ChdXZ26cOGMkeXlFWjFijUuNQIAYO640dSjmy29Rra7aj4r9gEAAGYIg0EArlm5cuXIGYG3bt0yrvvBD34w8nV1dfUj79/YOHwmRWFh4bR1BADMLrZt6+jRA3IcZySzLEvbtu2Ux8OPxgAATLexqwXzMlO0eXmBS20AAADmHt79AOCaV155RdLwm7T/+T//Zx07dkzvv/++vvjFL+rChQuyLEvr1q1TcXHxQ/eNRCJ6//33ZVmWFi5cONPVAQAJ6v33L6m9vdXI1qxZr9zcPJcaAQAwd9ztGtSZa21G9uqWMnn5cA4AAMCM8bldAMDc9cYbb+jv/u7vdPPmTd24cUOf/vSnH7rNb/zGbzzyvkePHlUwGBwZHgIA8DT9/X06c+aEkWVmZmnduk0uNQIAYG5590SjnFGXA8lePb++xLU+AAAAcxEfyQLgGp/Pp7/+679WUVGRHMcx/pGkX/3VXx1ZVTjWv/7rv458/bitRgEAuM9xHB07dlDRaNTIt23bKa+Xz8oBADDd+ociOnih2ch2ri9RIJnvwwAAADOJn74AuGrhwoX68Y9/rP/v//v/dPLkSQ0MDKi4uFivv/66duzY8cj7dHV16eLFiyopKVFaWpo2bNgws6UBAAnn5s0bam5uNLJly1aqqGieS40AAJhb9p9tUjhij1z2WJZe2TzfxUYAAABzE4NBAK5LS0vTJz/5SX3yk58c1+1zcnL01ltvTXMrAMBsEQwO6cSJI0YWCKRq0yZWnAMAMBOiMVvvnjI/oFO5slB5WSkuNQIAAJi72EoUAAAAs9qJE0cVCoWMrLp6h/x+v0uNAACYW45fvque/rCRvVbJakEAAAA3MBgEAADArNXYeFs3b94wsgULFqq8vMKdQgAAzDGO4+itmgYjWzY/WwvnZbrUCAAAYG5jMAgAAIBZKRIJ69ixg0bm9/tVVfWcS40AAJh7rtzqUmNbv5HtrmK1IAAAgFsYDAKYVl//+tcViURm7PkikYi+/vWvz9jzAQDi15kzJzQ4OGBkmzdvVSCQ6lIjAADmnrGrBYtyAlq/JN+lNgAAAGAwCGBa/V//1/+l119/Xf/4j/84rQPCcDis733ve/rABz6g//v//r+n7XkAAImhre2url69ZGTFxSVasmS5S40AAJh7mtoHdKGuw8heq5wvj2W51AgAAAA+twsAmN0sy1JTU5O+8IUv6P/9f/9f/dIv/ZJ+4Rd+QaWlpVPy+E1NTfrHf/xH/dM//ZM6OjrkOI68Xu+UPDYAIDHFYjEdOXLAyLxer7Zt2ymLNyIBAJgx75y4bVxOS/Fp+9p5LrUBAACAxGAQwDT7x3/8R/3Jn/yJzp07p/b2dv3t3/6t/vZv/1br1q3Tyy+/rK1bt2rNmjXyeMa3gNm2bV28eFHHjh3Te++9pwsXLshxHDmOI0nasGGD/s//8/+czn8lAECcu3jxrHp6uoxs/fotysjIdKkRAABzT89AWEcu3jWyFzeVKjmJD3ICAAC4icEggGm1Zs0afe9739NPfvIT/dVf/ZXq6uokSefPn9f58+clSSkpKVq8eLEWLVqk4uJiZWdnKyUlRY7jKBQKqaurS3fu3NHNmzdVW1urYDA48vj3B4KLFi3SZz/7Wb3++usz/y8JAIgb3d1dunDhjJHl5uZr1aq1LjUCAGBu2nu6UdGYPXLZ57X08qYyFxsBAABAYjAIYIZ88IMf1Ac+8AG98847evPNN3Xq1KmR64aGhnTp0iVdunTpCY8w7P4g8L7Nmzfr137t1/Tqq6+yPRwAzHG2bevo0QOy7QdvQlqWpW3bdo57ZToAAJi8cCSmPaebjGzrqmJlpSe71AgAAAD3MRgEMGM8Ho92796t3bt3q76+Xv/2b/+mvXv36sqVKw8N/B7HsiytXLlSL774oj70oQ+poqJieksDABLGtWuX1dZmblm2atU65eXlu9QIAIC56cilO+ofihjZa5XzXWoDAACA0RgMAnBFRUWFPvvZz+qzn/2surq6dP78eb3//vtqbGxUR0eHhoaGJEmBQED5+fkqKyvT8uXLtXbtWuXk5LjcHgAQb/r7+3X69Akjy8jI1Pr1m11qBADA3GQ7jt6uaTCy1QtzVVaY7lIjAAAAjMZgEIDrcnJy9MILL+iFF15wuwoAIAE5jqPjxw8qGjVXJmzbtlM+Hz/uAgAwky7UduhO56CR7a5itSAAAEC84LAVAAAAJLT6+lo1NZkrE5YsWaHi4hKXGgEAMHe9VXPbuFxakKbVFbkutQEAAMBYDAYBAACQsILBoGpqjhhZIBDQli3VLjUCAGDuunWnT1dvdxvZa5XzZVmWO4UAAADwEAaDAAAASFgnTx5VKBQ0sqqqHfL7k11qBADA3PX2CXO1YGaaX1tXFbvUBgAAAI/CoSsA4sqVK1d06tQptbS0qLe3V7FYTH/2Z3/mdi0AQBxqampQXd11Iysvr9CCBQtdagQAwNzV2RtUzZVWI3t5U6mSfHwmHQAAIJ4wGAQQF372s5/pr/7qr1RbWzuSOY4jy7IeGgy2t7frwx/+sGKxmDZv3qy/+qu/mum6AACXRSIRHTt20MiSkvyqqnrOpUYAAMxt751qVMx2Ri77fR69uKnMxUYAAAB4FD62BcB1X/jCF/T5z39etbW1chxn5J/Hyc/P17Zt29TV1aU9e/bo7t27M9gWABAPzp49oYGBfiPbvLlaqalpLjUCAGDuCoaj2ne22cieWztP6YEklxoBAADgcRgMAnDVX/zFX+j73//+yDBwx44d+p3f+R1VV1c/8X4f/vCHJQ2vKty/f/8MNAUAxIu2tlZduXLRyIqK5mnp0hUuNQIAYG47eL5FQ6HoyGVL0quV890rBAAAgMdiMAjANfX19fof/+N/SJIyMzP1jW98Q3//93+vX//1X9eSJUueeN+tW7cqEAhIko4fPz7tXQEA8SEWi+noUfMDIR6PV9u27ZRlWS61AgBg7rJtR++caDCy9UvyVZyb6lIjAAAAPAmDQQCu+d73vqdoNCrLsvQnf/InT10lOJrX69Xy5cvlOI5u3LgxjS0BAPHk0qVz6u7uMrL16zcrMzPLpUYAAMxtp6+1qb0naGS7q1gtCAAAEK8YDAJwzbFjxyRJ5eXl2r1794TvX1paKkm6c+fOlPYCAMSnnp5unT9/2shycvK0evU6lxoBAIC3Ttw2Li8oztCy+dnulAEAAMBTMRgE4Jrm5mZZlqV1657tDd309HRJ0sDAwFTWAgDEIcdxdPToAdm2PZJZlqXt23fK4+FHWgAA3HCjqUe1Tb1GtrtqPtt7AwAAxDHeRQHgmsHBQUlSauqznT0RDA5vV5OcnDxlnQAA8enatStqbTVXiK9cuVZ5eQUuNQIAAG/XmKsFczOTtWV5oUttAAAAMB4MBgG4Jjs7W5LU1dX15Bs+xu3bw7+E5ubmTlUlAEAcGhjo1+nTx40sPT1DGzZscakRAABo6x7SqWttRvbK5vnyeXmrCQAAIJ7x0xoA15SXl8txHJ0/f37C9+3q6tLFixdlWZZWrFgxDe0AAPHAcRwdP35IkUjEyLdt2ymfz+dSKwAA8M7JBjnOg8spfq92ri9xrxAAAADGhcEgANc899xzkqS7d+/q3XffndB9//t//+8jbxJv3759yrsBAOLDrVt1amw0tylbvHiZ5s0rdakRAAAYDEZ08HyLke1cX6LUFD60AwAAEO8YDAJwzUc/+tGR8wG/+MUvqrGxcVz3+5d/+Rd9/etfl2VZyszM1M///M9PZ00AgEtCoaBqao4YWUpKQFu2bHWpEQAAkKT9Z5sVCsdGLluW9MrmMhcbAQAAYLwYDAJwTXFxsT7zmc/IcRy1t7frF3/xF/Wtb33rkWcOhkIhHT16VJ/73Of0+7//+3Lu7Vnzuc99TqmpqTNdHQAwA06ePKZgcMjIqqqeU3JyikuNAABANGbr3VPmhzq3LC9UfnbApUYAAACYCPZ4AOCqz33uc6qtrdXbb7+tnp4e/emf/qn+9E//VElJSSO3qaysVH9//8jl+0PBD3/4w/r4xz8+450BANOvublRtbXXjKysbIEWLFjoUiMAACBJJ662qqsvZGS7q8pdagMAAICJYsUgAFdZlqW//Mu/1G/+5m/K4/HIcRw5jqNIJCLLsiRJfX19I7njOPJ6vfrP//k/60tf+pLL7QEA0yESiejYsYNGlpSUpK1bd4x8bwAAADPPcRy9VWOe/bukLEuLSjJdagQAAICJYsUgANd5PB791m/9ln7xF39R3/jGN3TgwAHV19c/dLt58+Zp165d+sxnPqP58+fPfFEAwIw4d+6U+vv7jGzTpmqlpqa51AgAAEjS+7e7dftuv5HtrmS1IAAAQCJhMAggbpSWlur3f//39fu///vq7u5WW1ub+vr6lJqaqry8PBUUFLhdEQAwzdrb23TlygUjKyws1rJlK11qBAAA7hu7WrAwO6CNS/NdagMAAIBnwWAQQFzKzs5Wdna22zUAADPItm0dPbp/5CxZaXhV+bZtO9lCFAAAl7V0DOhcbYeRvVo5Xx4P36MBAAASCWcMAgAAIC5cunROXV2dRrZu3SZlZWW7UwgAAIx450SDcTktxacda+e51AYAAADPisEgAAAAXNfb261z504bWXZ2rlavXu9SIwAAcF/vYFiHL94xsl0bS5Xs97rUCAAAAM+KwSAAAABc5TiOjh49KNuOjWSWZWn79p3yennDEQAAt+073aRI1B657PVYemlTmYuNAAAA8Kw4YxBAXDh58qR+8pOf6Pz582psbFR/f79isdjT76jhN48vX748zQ0BANPl+vWrunu3xchWrFij/PxClxoBAID7ItGY9pxuNLLqVUXKyUh2qREAAAAmg8EgAFd1dXXp937v93To0KGRzHEcFxsBAGZSf3+/Tp06ZmTp6RnasGGLS40AAMBoRy/dVe9gxMheq5zvUhsAAABMFoNBAK4JhUL65Cc/qRs3bjAMBIA5yHEc7d27R5GI+Wbj1q3PKykpyaVWAADgPsdx9PaJBiNbuSBH5UUZLjUCAADAZDEYBOCar3/967p+/bosy5LX69VHP/pRvfHGG1q2bJmysrI4VwoAZrnr16+rtrbWyBYtWqqSEs4sAgAgHly82anm9gEj211V7lIbAAAATAUGgwBc89Of/nTk66985St6/fXXXWwDAJhJwWBQe/bsMbKUlBRt2bLNpUYAAGCst2puG5dL8tO0dlGuS20AAAAwFTxuFwAwd92+fVuWZWndunUMBQFgjjl48KAGBswVCJWV25WSkuJSIwAAMNrtu326XN9lZK9VzpdlWS41AgAAwFRgMAjANT7f8KLlZcuWudwEADCTWlqadOnSRSMrLS1XRcVilxoBAICx3hlztmBGapK2rS5yqQ0AAACmCoNBAK4pLS2VJIVCIZebAABmSiQS0bFjB43M50vS1q07WIEAAECc6OoL6djlu0b20qYyJfk4Bx4AACDRMRgE4Jrnn39ejuPo/PnzblcBAMyQmprD6uvrNbJNm6qUlpbuUiMAADDWntONitnOyOUkn0cvbip1sREAAACmCoNBAK75+Mc/Lr/fr1u3bundd991uw4AYJrV1l5Tbe01I5s3b56WL1/lUiMAADBWKBzTvjNNRrZ9TbEyU/0uNQIAAMBUYjAIwDVFRUX6whe+IMdx9L//7/+7ampq3K4EAJgmPT3dOn78kJH5/X7t3v0BthAFACCOHLrQooFg1Mheq5zvUhsAAABMNZ/bBQDMbb/4i7+o5ORk/eEf/qE+9alP6ZVXXtFrr72mJUuWKCMjY9xvFpeUlExzUwDAs4pGo9q//11Fo+abjK+++qqys7PV2TngUjMAADCabTt650SDka1bnKd5eWkuNQIAAMBUYzAIwHU/93M/p5aWFv3X//pf9c477+idd96Z0P0ty9Lly5enqR0AYLJOnjyq7u5OI1uzZq1WrFihWMx2qRUAABjr7I12tXYPGdnuqnKX2gAAAGA6MBgE4KqWlhb9xm/8hmpra0dWBzqO85R7AQASxc2btbp27YqRZWfnaNeuXe4UAgAAj/VWzW3jcnlhulaUZ7tTBgAAANOCwSAA1/T09OjjH/+4mpubjTwQCCgzM1Ner9elZgCAqdDb26Njxw4Ymc/n086dr8jn48dQAADiSV1zr6439hjZ7qpyzgIGAACYZXhHBoBr/v7v/17Nzc2yLEspKSn6D//hP+iNN97Q/PkcbA8AiS4Wi+nAgfcUiUSMvLp6h7Kzc1xqBQAAHuftE+ZqwZyMZFWuLHSpDQAAAKYLg0EArnn33XclDZ8R+NWvflVbtmxxuREAYKqcOnVMnZ3tRrZ48TItXrzMpUYAAOBx2nuGdPJqm5G9vLlMPq/HpUYAAACYLvyEB8A1TU1NsixLmzdvZigIALPI7ds3dfXqJSPLyspWVdVzLjUCAABP8u7JRtmjznpPTvLqhQ0lLjYCAADAdGEwCMA1gUBAkrRw4UKXmwAApkp/f5+OHNlvZF6vVzt3vqKkpCSXWgEAgMcZDEZ14Jx57vvz6+YpLYXv2wAAALMRg0EArikpGf4E6sDAgMtNAABTwbZtHTjwnsLhsJFXVm5XTk6uS60AAMCTHDjXrGA4NnLZsqRXKjn3HQAAYLZiMAjANS+//LIcx9Hp06fdrgIAmAKnT9eovb3VyCoqFmvp0hUuNQIAAE8Ss229e6rByDYtK1BhdsClRgAAAJhuDAYBuOaXf/mXlZWVpTt37ui73/2u23UAAJPQ2Hhbly+fN7KMjExt3fq8LMtyqRUAAHiSk1fb1NkbMrLdVeUutQEAAMBMYDAIwDUFBQX68z//cyUnJ+tP//RP9c///M9uVwIAPIOBgX4dPrzXyDwej3bufEV+v9+lVgAA4Ekcx9FbNbeNbHFJppaUZrnUCAAAADPB53YBAHPXiRMnlJycrM9//vP68z//c/0f/8f/oW9+85vavXu3li5dqoyMjHGvMqmsrJzmtgCAR7FtWwcP7lEoZK422LJlq/Ly8l1qBQAAnuZ6Y4/q7/QZGasFAQAAZj8GgwBc84lPfMIY/DmOo6tXr+rq1asTehzLsnT58uWprgcAGIdz506ptfWOkZWXV2j58tUuNQIAAOMxdrVgflaKNi0rcKkNAAAAZgqDQQCuchzniZcBAPGrublRFy6cMbL09Axt3/4C5woCABDH7nYO6uz1diN7dct8eTx8/wYAAJjtGAwCcA3bfwJA4hoaGtShQ+a5gpZl6fnnX5bfn+xSKwAAMB5vn2zQ6I9kBpJ92rFunmt9AAAAMHMYDAJwzTe/+U23KwAAnsH9cwWDwSEj37SpWgUFhS61AgAA49E/FNHh8y1GtmtDiQLJvEUEAAAwF3jcLgAAAIDEcvHiWd2502xkZWXlWrVqrUuNAADAeO0906Rw1B657PVYenlzmYuNAAAAMJMYDAIAAGDc7txp1rlzp4wsNTVN27fv4lxBAADiXCRqa8+pRiOrXFmo3MwUlxoBAABgpjEYBAAAwLgEg0M6eHCPHOfBqUT3zxVMSeENRQAA4t3xy3fVMxA2st2V5S61AQAAgBsYDAIAAOCpHMfRoUP7NDQ0aOQbNmxRUVGxS60AAMB4OY6jt0/cNrIV5dlaUJzhUiMAAAC4gZOlAUyr5mbzDKqSkpLHXjcZox8XADD1Ll06p+bmBiObN69Ua9ZscKcQAACYkMv1XWpsGzCy16pYLQgAADDXMBgEMK1eeumlkTOnLMvS5cuXH3ndZIx9XADA1GptvaMzZ04YWSAQ0I4dL3KuIAAACeKtGnO1YHFuqtYtznOpDQAAANzCYBDAjBh9HtVErgMAuCsUCurAgfce+rt6x46XFAikutQKAABMRGNbvy7e7DSy1yrny8MHfAAAAOYcBoMAptWTtvhk+08AiG+O4+jw4f0aHDS3HVu3bpPmzSt1qRUAAJiot0+Y24GnB5K0fQ1nBAMAAMxFDAYBTKs9e/Y803UAAPddvXpRjY23jKyoaJ7WrdvkUiMAADBRPf0hHbt0x8he2lQqf5LXpUYAAABwk8ftAgAAAIg/7e2tOnXquJGlpKTo+edfksfDj5AAACSK9043KRp7sCW4z+vRi5vKXGwEAAAAN7FiEIBrTpw4IUkqKipSeXn5hO/f0NCgO3eGP/laWVk5pd0AYC4Lh8M6cOA92bZt5M8996JSU9NcagUAACYqFIlp35kmI9u2ukhZaX6XGgEAAMBtDAYBuOYTn/iELMvSxz/+cf3BH/zBhO//7W9/W1//+tdlWZYuX748DQ0BYO5xHEdHj+5Xf3+fka9Zs0GlpfNdagUAAJ7FkYt31D8UMbLXKvl+DgAAMJcxGASQ0BzHefqNAADjdu3aFd26ddPICgqKtGHDFpcaAQCAZ2E7jt4+0WBkaxblqrQg3aVGAAAAiAccEAMAAABJUmdnu06cOGpkfn+ynn/+Zc4VBAAgwZy/0aG7nYNGtrtq4kc4AAAAYHbhHR4ACSsYDEqSkpOTXW4CAIkvErl/rmDMyJ97bpfS01lZAABAonmr5rZxuawgXasW5LjUBgAAAPGCwSCAhHX/XMGcHH65BYDJcBxHx44dUm9vj5GvXLlW8+cvcKkVAAB4VvV3evV+Q7eR7a6aL8uy3CkEAACAuMEZgwBmRHNz82OvGxgYeOL1o0WjUd29e1c/+9nPdO7cOVmWpRUrVkxVTQCYk27ceF83b94wsry8Am3aVOVSIwAAMBlv15hnC2al+1W9qsilNgAAAIgnDAYBzIiXXnrpkZ9OdRxHP/jBD/SDH/zgmR/7537u5ybRDADmtu7uTtXUHDaypCS/du58WV6v16VWAADgWXX2BlVzpdXIXtlcJp+XTaMAAADAYBDADHMcZ1zZeL3xxht64403JlMJAOasaDSq/fvfUyxmniu4fftOZWRkutQKAABMxrsnG2WP+h3Ln+TRCxtKXWwEAACAeMJgEMCMKCkpeShrbm6WZVlKTU1VVlbWUx/DsiwlJycrOztbS5cu1e7du7V9+/bpqAsAc0JNzWH19HQZ2fLlq7RgwSKXGgEAgMkYCkW1/1yTke1YO0/pgSSXGgEAACDeMBgEMCP27NnzUHb/bMCPfOQj+oM/+IOZrgQAc1pd3XXduPG+keXk5GnLlq0uNQIAAJN18HyLhkIPdgKwJL1aOd+9QgAAAIg7bDAPwFWT2UYUAPBsenu7dezYQSPz+ZL0wgsvy+vlc2MAACSimG3rnRMNRrZhab6KclJdagQAAIB4xDs/AFzz3nvvSZLS09NdbgIAc0csNnyuYDQaNfKtW59XZma2O6UAAMCknb7Wro7eoJHtrip3qQ0AAADiFYNBAK4pLS11uwIAzDknThxTV1eHkS1ZslyLFi1xqREAAJgsx3H0Vs1tI1s4L1NLy55+ljsAAADmFrYSBQAAmCPq6+t07dplI8vOzlFV1XMuNQIAAFOhtqlXdc29Rra7ar4sy3KpEQAAAOIVg0EAAIA5oK+vV0eP7jcyr9ernTtfkc/HJhIAACSysasF8zJTtHl5gUttAAAAEM8YDAIAAMxysVhMBw68p0gkYuTV1TuUnZ3jUisAADAVWrsGdfpam5G9uqVMXg9v+QAAAOBh/JQIAAAwy50+fVwdHeYbhosWLdXixctcagQAAKbKOycb5Yy6HEj26vn1Ja71AQAAQHxjMAgAADCLNTTU68qVi0aWmZml6uodnDsEAECCGwhGdOh8i5HtXF+iQDLbhAMAAODRGAwCAADMUv39fTp82DxX0OMZPlcwKSnJpVYAAGCq7DvTpFAkNnLZY1l6ZfN8FxsBAAAg3jEYBAAAmIVs29bBg3sUDoeMvLJym3Jz81xqBQAApko0Zuu9U41GtmVFgfKyUlxqBAAAgETAYBAAAGAWOnPmhNra7hrZggWLtGzZSpcaAQCAqVRz5a66+8NGtruq3KU2AAAASBQMBgEAAGaZpqbbunTpnJGlp2do27adnCsIAMAs4DiO3qppMLJl87O1cF6mS40AAACQKBgMAgAAzCKDgwM6dGifkXk8Hu3c+Yr8fr87pQAAwJS6eqtLDa39Rra7krMFAQAA8HQMBgEAAGaJ++cKhkJBI9+8eavy8wtcagUAAKbaWyfM1YJFOQGtX5rvUhsAAAAkEgaDAAAAs8T586d1926Lkc2fv0ArVqx2qREAAJhqze0DOl/bYWSvVc6Xh+3CAQAAMA4MBgEAAGaBlpYmnT9/2sjS0tK1ffsLnCsIAMAs8vaY1YJpKT5tXzvPpTYAAABINAwGAQAAEtzQ0KAOHtxjZJZlaefOl5WcnOJSKwAAMNV6B8I6cvGOkb24qVTJSV6XGgEAACDRMBgEAABIYI7j6NChvQoGh4x848YqFRQUudQKAABMhz2nGxWN2SOXfV5LL28qc7ERAAAAEg2DQQAAgAR24cIZtbQ0GVlp6XytXr3OpUYAAGA6hCMx7T1jfs+vXlWkrPRklxoBAAAgETEYBAAASFB377bo3LlTRhYIpOq553ZxriAAALPM0Ut31DcYMbLdleUutQEAAECiYjAIAACQgILBoA4e3CPHcUay++cKpqQEXGwGAACmmu04evtEg5GtXpirssJ0lxoBAAAgUTEYBAAASDCO4+jw4X0aHBww8vXrN6uoaJ5LrQAAwHS5WNehlo5BI9tdOd+lNgAAAEhkDAYBAAASzOXL59XUdNvIiotLtWbNBncKAQCAafVWjblasLQgTasX5rrUBgAAAImMwSAAAEACaWu7q9Ona4wsJSWg559/UR4PP9oBADDb3L7bpyu3uozstcr5nCcMAACAZ8K7RwAAAAkiFArpwIH3jHMFJWnHjhcVCKS61AoAAEynsasFM9P82rqq2KU2AAAASHQMBgEAABKA4zg6cmS/Bgb6jXzt2o0qKSlzqRUAAJhOXX0h1Vy5a2QvbypVko+3cwAAAPBs+EkSAAAgAVy9ekkNDfVGVlhYrPXrN7tTCAAATLt3TzUoZj/YKcDv82jXxlIXGwEAACDRMRgEAACIcx0dbTp16piRJScna+fOlzlXEACAWSoYjmr/mWYj2752njJS/S41AgAAwGzAO0kAAABxLBwO68CB92TbtpE/99yLSk1Nc6kVAACYbofOt2gwFDWy1yrnu9QGAAAAswWDQQAAgDjlOI6OHj2gvr5eI1+9ep3KyspdagUAAKabbTt652SDkW1Ykq/i3FSXGgEAAGC2YDAIAAAQp65fv6pbt+qMLD+/UBs3VrnUCAAAzIQz19vU1h00st1VrBYEAADA5DEYBAAAiENdXR06ceKIkfn9fs4VBABgDnirxlwtuKA4Q8vmZ7tTBgAAALMK7yoBAADEmUgkov3731UsFjPy7dt3KT09w6VWAABgJtQ29ehGU4+R7a6cL8uyXGoEAACA2cTndgEAwMTU1dXpq1/9qo4fP67W1lb5fD6Vl5frtdde06c//WmlpaW5XRHAJB0/fki9veYbgitXrlF5eYU7hQAAwIx564S5WjAnI1lbVhS61AYAAACzDSsGASCBnDx5Uh/5yEf0z//8z0pKStJLL72kyspKNTU16b/9t/+mj33sY+rp6Xn6AwGIW7W111RXd93I8vLytWlTtUuNAADATGnrHtKp91uN7NUt8+Xz8vYNAAAApgYrBgEggfzRH/2RgsGg/uN//I/63Oc+N7KdUHd3tz7zmc/o0qVL+vu//3v99m//tstNATyL7u4uHT9+yMiSkpK0c+cr8nq9LrUCAODpgkMRtbb0KRSMqqgkQ5nZAbcrJaR3TzbKcR5cTvZ7tXP9PPcKAQAAYNZhMAgACaKrq0vXr19XUlKSfvM3f9M4YyQ7O1uf+cxn9Nu//ds6d+6ciy0BPKtoNKoDB95VNBo18m3bdiojI9OlVgAAPCwSjqntTp9aWx7809cTHLnen+zVhz++QXmF6S62TDyDwYgOnG82sp3rSpSakuRSIwAAAMxGDAYBIEEkJY3vDYGcnJxpbgJgOpw4cUTd3V1GtmzZSlVULHapEQAAUixqq721X233h4B3+tTVPvjE+4RDMdXf6GAwOEH7zzUrFI6NXLYs6dUtZS42AgAAwGzEYBAAnqC/v1+HDx/W8ePHdfnyZdXX16uvr0/JyckqLCzUunXr9HM/93N6/vnnjRV80yE9PV0bN27UmTNn9Ld/+7cPbSX6D//wD5Kkj33sY9PaA8DUu3nzhq5fv2pkOTm52rJlm0uNAABzkW076uoYfDAEbOlTR2u/bNt5+p3HKCjOmIaGs1c0Zuvdk41Gtnl5ofLZkhUAAABTjMEgADzG1772Nf3FX/yFQqHQQ9dFo1HdvHlTN2/e1L/+679qy5Yt+vKXv6ySkpJp7fSnf/qn+vVf/3X9zd/8jX7yk59o+fLlCgaDOnXqlAKBgP6f/+f/0Y4dO6a1A4Cp1dvbo6NHDxqZz+fTzp2vyOfjRzUAwPRwHEe93UG1tvSNDALb7vYpGrGf+TF9Po/yi9K1ZnOp5i9kF4uJOHm1VV195u8du6vmu9QGAAAAsxnvNgHAY9y8eXNkKFhUVKTt27dr9erVysvLUygU0tmzZ/XDH/5Qg4ODOnnypD7xiU/o+9//vvLy8qat0+LFi/Wd73xHv/Vbv6WzZ8+qvr5+5Lrt27dryZIl0/bcAKZeLBbV/v3vKhqNGPnWrc8rKyvbnVIAgFlpoC80shXo/UFgKBh9+h0fw+OxlFuQpsJ5GSqYl6HC4gzlFqTJ45neXTRmI8dx9FZNg5EtKcvS4pIslxoBAABgNmMwCACPYVmWduzYoc985jPatm2bPB6Pcf1HPvIR/ft//+/1v/1v/5tu3rypxsZGfeUrX9GXvvSlhx7r937v93T+/PkJPf+rr76q3/7t3zayY8eO6XOf+5zy8/P193//91q/fr2Ghoa0b98+ffnLX9b+/fv1N3/zN6waBBLEyZPH1dXVYWSLFy/TokVLXWoEAJgNgkMRtd15sB1oW0ufBvrDk3rM7NyACudljgwC8wvT5EvyTlHjue1SXYdu3e0zst2VrBYEAADA9GAwCACP8fnPf17Z2dlPvE1paan+8i//Uj//8z8vSfrpT3+qL3zhCwoEzLNAWlpadPPmzQk9f1tbm3G5u7tbv/Vbv6VwOKyvfvWrKi0tlSRlZmbql3/5l5WRkaHPf/7z+sM//EO9/fbb8np5owaIZ7du1en99y8ZWVZWjqqqnnOpEQAgEUXCMbXf7TdWA/Z0DU3qMdMzk1U4L2N4CFg8/E9yCm8fTJd/PVBnXC7MDmjj0gKX2gAAAGC24yd7AHiMpw0F71uxYoUWLlyomzdvamhoSLdu3dKKFSuM23zzm9+cdJ99+/apu7tb27ZtGxkKjvbaa68pKSlJjY2NamhoUEVFxaSfE8D06Ovr1ZEjB4zM6/XqhRdeVlJSkkutAADxLhaz1dk2MLISsLWlT13tA3KcZ3/MlNSk4SFgccbIasDUNP/UlcYTNbb26cSVu0b2auV8tmQFAADAtGEwCABTID09feTr++cSTrW7d4ffMMjIyHjk9T6fT6mpqerp6VFPT8+0dAAwebFYTAcPvqdIxNzSrarqOWVn57rUCgAQb2zbUXfn4MhWoK0tfepo7Vcs9uxTwCS/VwX3BoD3/0nPTJZlMYRyyw/HrBZMTfbpubXFLrUBAADAXMBgEAAmKRwOq76+fuRySUnJtDxPQcHwdkKXLl1SNBqVz2f+FV5fXz8yEHzUikIA8eHMmRq1t5tbBS9cuERLlix3qREAwG2O46ivJ6TWlt6RQWDb3X5FwrFnfkyv11JeUfrISsDCeRnKzktlCBhHegdCeu/EbSPbtbFUKX7eqgEAAMD04adNAJikH/3oR+rr65MkrV69emSAN9V27typlJQUNTU16Stf+Yp+53d+Z2Q42NnZqT/4gz+QJFVVVSk/P39aOjxNZmaKK8+L6XN/GyuPx1JubprLbRJfXV2tLl++YGTZ2dl6/fXd8vvn1rZtvLYwHXhdYTpMx+uqvy+kloZuNTf2qKWhRy2NPRocCD/9jo9heSwVFKVrXlmWSuZna15ZlgqLM+T1eaakL6bH99+7pnDUHrns81r6xZeXKTeLn6nx7PheCAAAnobBIABMQmdnp77yla+MXP7N3/zNaXuu/Px8/cEf/IG+8IUv6Gtf+5reeustrVq1SsFgUOfOnVNfX5/y8/P1x3/8x9PW4Wm8Xt58mq0sy5LXywqDyejt7dXbb79tZF6vVx/60IcUCMzdNwB5bWE68LrCdHjW11VwKKLmhh41N3SP/NPbHZxUl9z8NJXMz1JJefbwILA0S0l+76QeEzMrHInpJ4frjez5DaUqyE11pxBmHb4XAgCAx2EwCADPKBwO67Of/aw6OjokSa+88opeffXVaX3Oj33sY1q2bJm+8Y1v6PTp09q/f7+8Xq/Kysr0sY99TL/+67+uvLy8ae3wJLGY/fQbIaF4PJYsy5LjOLLtZz/TaK6zbVs//vGPFQyabwTv3PmC8vLy5+T/d3htYTrwusJ0mMjrKhKJ6W5Tr5obu9XS0KPmhh51tg9M6vkzslLurQTM0ryyLM0ry1YgNemh283F7yWJbO+pBnX3m2eT/y/PL+K/IyaN74WzGx/GBQBMBctxHH5KAIAJsm1bv/d7v6d/+7d/kySVl5frn/7pn5SVleVyM3d1dQ0qGn32s3AQf3Jz0+T1ehSL2ersnNwbm3PZ6dM1unjxrJEtWLBQO3e+MmfPeuK1henA6wrT4XGvq1jMVmfbgNru9Km1ZfifzrYBTeY37OQU38h5gAX3/kxLT56CfwvEg56BsJra+tXUNqB3TjaovefBB4ZWLsjR7/7KRhfbYbbge+Hs5fN5lZPDqmIAwOSxYhAAJshxHP3hH/7hyFCwpKREX/va1+b8UBDAozU1NTw0FExPz9C2bTvn7FAQABKNYzvqaO3X9aut94aAvWpvHVAs+uyru3xJHhUUZ4wMAgvnZSgjK4XvDbPAUCiqprYBNbYPDwGb2vrV1D6gvsHIY++zu2r+DDYEAADAXMZgEAAmwHEc/dEf/ZG+//3vS5KKi4v1jW98Q2VlZS43AxCPBgcHdfjwXiPzeDzaufNl+f2sAAGAeNbbPaRbtZ1qud2jxltdCgWjz/xYHo+lvMJ0YwiYnZcqj4chYCKLRGNq6RhU471VgE3tw0PAjt7Q0+88SllhutYscu84AAAAAMwtDAYBYJwcx9EXv/hFffe735UkFRUV6c0331R5ebnLzQDEI9u2dejQnofOFdy0qUr5+YUutQIAPE4sZquloUe3azt1q65T3R2Dz/xYOfmpxhAwryBdXh/nQiWqmG2rtWtoeBXgvdV/TW0Duts1OKmtYyXJsqRP/9wqeVgpCgAAgBnCYBAAxuH+UPA73/mOJKmwsFBvvvmmFixY4HIzAPHqwoUzunOn2cjKysq1cuValxoBAMbq7wvpdm2nbtd2qPFWtyLhiZ+VnJmdMnwm4L1tQQuKM5Tk905DW0w3x3HU2RtSU3u/Gu9vAdo2oOaOQUVjz75t7Gh+n0cl+WkqLUjT0vJcbV07T8W5qZwFBwAAgBnDYBAAnmLsULCgoEBvvvmmKioq3C0GIG7dudOsc+dOGVlqapqee24XZ0cBgIts29Hdpl7dquvQ7dpOdbRObBiTmuZXwTzzXMCUQNI0tcV06h0MP1gB2DagpnvnAQafYTj8KF6PpeLcVJUWpKk0P01lBekqLUhTfnZgZHVgbm6avF6PYlM0dAQAAADGg8EgADzFH//xHz80FFy4cKHLrQDEq6GhIR08uMfILMvSzp0vKzk5xaVWADB3DQ6E1VDXqdt1nbpd16VwaPxnBaYEkrR0ZaGWrixU6YJsRWIxPuCRYIZCUTW3D5//19jaP3IOYO9gZMqeoyA7RaX5w4O/0oLhIWBxbqp8XraPBQAAQPxhMAgAT/Anf/In+va3vy3pwVBw0aJFLrcCEK8cx9GhQ3s1NGSeS7VhQ6UKC4tdagUAc4vjOGq706dbtZ26Xdup1pa+Cd2/cF6Gyhflqnxx7vD5gHnpI6u62O4xfkWitlo6BkbO/7u/ErCjN/j0O49TVppfZQVpKi1IV2n+8J8l+alK8fPWCgAAABIHP70CwGP8xV/8hb71rW9JGl7t88lPflJ1dXWqq6t74v1WrVqlkpKSmagIIM5cvHhOLS2NRlZSUqY1a9a71AgA5oZQMKKGm13Dw8C6TgUnsBrMn+zV/IXDg8DyRblKTfNPY1NMlm07au0eGjn/r/HeCsC7nUOyHWdKniOQ7BtZ+Te8DWiaSvLTlJHKawMAAACJj8EgADzG6dOnR752HEd//ud/Pq77felLX9JHP/rR6aoFIE61tt7R2bMnjCwQSNVzz73ItnMAMMUcx1Fn28DIqsA7TT2ayEworyBteBC4OFfFpVnyePh7Ot44jqOuvpAaR53/19Q2oOaOAUWiU3MmX5LPo5K8tAerAO+dB5iTkcz3bgAAAMxaDAYBAAAmKRQK6sCB9+SMelfasiw9//xLCgQCLjYDgNkjEo6psf7+qsAODfSFx31fX5JHZRU5WrA4V+WL8pSemTyNTTFRfYPh4cHfvdV/jfe2Ax2awHmQT+KxLBXlBlRakK6ye1uAlhWkqSA7wFAYAAAAcw6DQQB4jG9+85tuVwCQIM6cOaHBQfPcqXXrNqm4mG2FAeBZOY6j7s4h3b43CGxu6JEdG/+ywOzcgMoX52rB4jzNK8uS1+eZxrYYj2A4OnIGYNOolYA9A+Mf8j5NflbKyPl/97cDLc5NVRL//QEAAABJDAYBAAAmpb29TdeuXTGy4uISrV270aVGAJC4opGYmht6dOtGh27Xdaq3Ozju+3q9lkoWZGvBojyVL85VVg4rtt0Sjdm60zGoxrb+kUFgY1u/2nvG/9/zaTLT/PcGgA/OAizJT1Mgmbc5AAAAgCfhJ2YAAIBn5DiOamoOG5nP59P27bvk8bAyAQDGo7c7qNt1Hbpd26mmW92KTuD8uIzMZJUvydOCRbkqWZCtpCTvNDbF01xr6NY/7ruh+pY+xewJHPr4BIFkr0rzH5z/d38lYGaqf0oeHwAAAJhrGAwCAAA8oxs33ld7e6uRrV27Uenp6S41AoD4F4vZutPYM3xWYG2nujoGx31fj8dScVnW8FmBi3OVk5cqy+KMuHgQidr6mx9cVO8zbgvq83pUkpc6cv7f8CAwXbmZyfw3BgAAAKYQg0EAAIBnEAoFdfp0jZFlZGRp1ap1LjUCgPg10BfS7bpO3artVGN9lyLh2Ljvm5ruV/miXC1YnKuyihz52SoyLjW29Y9rKGhZUlFOqrEFaGlBmgpzAvKy2h4AAACYdvxGBQAA8AzOnj2pUMg8K6mqaru8XraxAwDbdtTa3DuyKrC9tX/c97UsqagkU+WLc1W+KFf5RemsGEsAfYMPDwXzMpOHt/7MvzcELEjTvLxUJfn4XgkAAAC4hcEgAADABHV0tOvatStGVl5eodLS+S41AgD3DQ2G1VDXpVt1nWqo61QoGB33fVMCPpUvylX54jzNX5ijlEDSNDbFdOgbjBiX5xem64ufqXKpDQAAAIDHYTAIAAAwAY7jqKbmsBzHGcm8Xq+2bNnmYisAmHmO46j9br9u1XbqVm2HWpv7JnT/guL04S1Cl+SpoDhDHg+rAhNFKBJTZ29QHT1Btd/788qtLuM2makMdwEAAIB4xGAQAABgAmprr6mt7a6RrV27UenpGS41AoCZEwpG1VjfpVu1Hbpd16mhgcjT73SPP9mr+QuHtwctX5Sr1HT/NDbFs3IcRwPBqDp6guq4N/TrGDME7B96+n/3jDT++wIAAADxiMEgAADAOIXDIZ0+fdzIMjIytXr1OpcaAcD0chxHne2Dul3boVu1nbrT2KNRC6afKrcgbXhV4OJcFZVmyuv1TF9ZjIvtOOrpDz8Y+D1i+BcKxyb9PPlZKVPQFgAAAMBUYzAIAAAwTmfPnlQwGDSyysrt8nr5kQrA7BEJx9R4q0u3azt1u65T/b2hcd/Xl+RR2YIclS8eXhWYwXBoxkVj9kPbfI4e/nX2hhSzJzDdfQYZqUnasXbetD4HAAAAgGfDu1gAAADj0NnZofffv2xkZWULVFZW7lIjAJg63Z2Dul3bqVu1nWpu6JYdG//gKCsnoAWLc1W+OFfz5mfL52NV4HQaCkUfWuU3euVfT39Y0zv2eyA9kKS8zBTlZaWM/FmQlaIVC3IUSObtBgAAACAe8ZM6AADAUziOo5qaw3JG7Z/n9XpVWbnNxVYA8OyiUVvNt7tHVgX2dA2N+75er6WS8uyRVYHZuanT2HRucRxHfUOR4WHfI8746+gNaiAYnZEulqTsjOSHBn8P/kxWip+3FAAAAIBEw0/xAAAAT1FXd12trXeMbM2aDcrIyHSpEQA8u6ZbXXr7B1cUHIqM+z7pmclasDhP5YtzVVqerSS/dxobzl4x21Z3X9g4z6+jJzi89ee9r8NRe0a6+LyWcjMfDPryxwz+cjKS5eNMSAAAAGDWYTAIAADwBOFwWKdOHTey9PQMrV693qVGAPDsIuGY3vu3q08dCno8lorLMlW+OE8LFuUqJz9VlmXNUMvEFYnG1NEbGlnd1z5m5V9XX0i2MzMbfQaSvcNDvses+MtM88vDf1MAAABgzmEwCAAA8ATnzp1UMGhusVdVtV0+Hz9GAUg852oaNNAffuR1qWn+ke1ByypylJzC33NjDQYjw8O+R53x1xNU7+D4V2FOVmZq0mO2+ExRflaKUlOSZqyLJNnhmIaONSra0i/fvHSlbiuTlcTKUgAAACDe8JseAADAY3R1derq1UtGVlZWrrKyBS41AoBnN9Af0pnjDUaWlRPQ8jVFKl+cq/yi9Dm7KtC2HfUNhtXdH1ZXf0jdfSF19w//0x+MqbM3qNauQQ3O0Pl+HstSTkayMfDLv/d1bubwuX/+OBq6Obajwf23FG3ukyRFbnYrPC9dyUvzXG4GAAAAYCwGgwAAAI/gOI5qag7LGbXlm8fjVWXldhdbAcCzqzlQr2jEPL/utQ+vUn5RukuNpp/jOBoMRe8N+sLqGjXw67qXdfeH1NMfnrEtPiXJ7/MMn++X9egz/rIz/PJ6Eud8v+DZOyNDwfucyMyclQgAAABgYhgMAgAAPMLNm7W6e7fFyNasWa+MjEyXGgHAs+to7dfV83eMbMW64oQeCoYiseEhX1/o3iq/8MjQr3vU0C8cnfkBVVqK77Fn++VlpSgjkDRrVmeG67sVutBqZFaKT/5FOS41AgAAAPAkDAYBAADGCIfDOnXqmJGlp2dozZoN7hQCgEk6urfOuOxL8qjq+Qp3yjxFNGard2D0lp5hcwDYH1Z3X0iDoZnZ1nMsS1JWuv/BsO8RA8BA8tz4VTvWNaTBw+b2tLKktF0L5OGMSgAAACAu8ZM6AADAGOfPn9bQ0KCRbdmyTT4fPzoBSDy36zrVcLPLyDZUz1daRvKM9rAdR/2DkUdu5flgi8+w+gbCmrlNPU0ey1JWul/Z6X5lpycrOyNZJQXpys8OKC8rIL/lKCcjRUm+xNnmc7rYoagG9tZLY1ZkBqpK5UvglagAAADAbMe7WwAAAKN0d3fpypULRlZaOl/z5y9wqREAPDvbdnRkT62RpaX7taFq/pQ9h+M4GgrFxgz8HqzsG9nesz+smO3WyE9KDyQpOz1ZORnJxuAvJz1Z2RnDlzNT/fJ4zC0+c3PT5PV6FIvZ6uwccKl9fHFsR4MHbsvuCxu5f0mO/MvzXGoFAAAAYDwYDAIAANzjOI5qag7LcR68ce3xeFRZuX3WnAUFYG65er5FXe3mCuiqnQuV5PeO6/7hSEzdA6MGfPdW+T3Y5nP4cigSm47645Li9z488Bu5PJxlpSezym8KBc/cUbS5z8i8+akKbC3j+yUAAAAQ5xgMAgAA3FNfX6c7d5qNbPXq9crMzHKpEQA8u3AoqpoD9UaWV5impasLFY7ENBCMjlnhF1J3372h373B30DQnXP8JMnntUaGfNn3hn45I18/GALOlfP84kW4vluhi61GZqX4lLZrgSwvw1cAAAAg3vEbFAAAgKRIJKJTp44ZWVpautau3ehSIwBzneM4CkdthSIxhcL3/onEFIzEFA4P//mk62J3BpQ0GDEe82TXgN76yj457u3oKcuSMtOGh3o5jxn65WQkKy3Fx+qzOBPrGtLg4QYztKS0XQvkSfO7UwoAAADAhDAYBAAAkHT+/GkNDppnR23Zsk0+Hz8uAXgy23EUidiPHtSFYwrfG9jdzx553WMGfM86v/NLWiuPpAeDtW45ao/YU/Gv/FhpKb4H5/bdO7svx1j1l6zMtCR5PawsSzR2KKqBPfVS1HwNBapK5StKd6cUAAAAgAnjnS4AADDn9fR06/Ll80Y2b16Zyssr3CkEYFrYjqPQowZ1975+5BAvYisUjo65zlYoEr13XczV8/Uep1SWPKOGgo4cNejZh4L+JI9yjHP7Hqz0u/91TrpfSb7xnV2IxOLYjgYP3JbdHzZy/5Jc+ZfnudQKAAAAwLNgMAgAAOY0x3FUU3NYzqh99Twej6qrt7OFHTAJjuPIdhxFY45iMVtR21Es5igasxWN2cNf2/aDzL53u5ijmD3mdiP5g/tF7+WxUffzej0KR20Fw1H1D4QfGv6Fp3m1XLxIlZQvc0VemxwFH3Fbr8cyhnv3z+4bPQDMyUhWit/L34lzWPDMHUWb+4zMm5+qwNZSXhcAAABAgmEwCAAA5rTbt2+qpaXJyFatWqfMzGx3CgGP4cagbfTtRx7jEY87+vb3e8Vi9jNvg4nHS07yKtnvVXKSR8lJPiX7PUpJ8irZ71Nykkf+JK/CtV2K9j1Y2eXxebT7taVKz0gevn+SVyl+r1KSfUoPJMnDYAdPEK7vVuhiq5FZKT6l7Vogy8uWsAAAAECiYTAIAADmrEgkohMnjhpZamqa1qzZoGhseGXR8DBGkjO8DaGR6V7mDGeOhk/zsixLsiSPZenel7JGvr73p/WojDfnJ8txhodVMduRbT/4OnZvsGXbjqIj190fgo2+/cNZ1LbHPNbw7Yzs/v1jox5j1PPFYo+5/b3BXMwZvq/R4f7jOQzaEo0lye/33hvYeUcN8x6d3f/6kdeNul9SkuepQ7yb19v1s7N3jazyuQXatK5kGv+NMVvFuoY0eLjBDC0pbdcCedL87pQCAAAAMCkMBgEAwGMdu9ii0++3DZ+lFYyODMaGB2WOHGNY9mA4NnZYNnLdozKNvm5M9tj7jrq9HvO49/6833f0fYeHfFJ5WpvK0weMf+cTjRn6yX89NBP/8z6SMUR8zPBw+Dajb2cOID3W8CNZ94aTskbnDwaW0nD2YJA5+vEfMdS0xnYYz21Gdbj3fLKklJQkOY4UjdoaDEbuDdXsMQOyh4ds9wd293Nj0BdzRl6PwHhYlpTi98r/mIFdSpL3iQO+x13n93lcGfTHYraO7q0zsvTMZK3bUjrjXZD47FBUA3vqpai5BW+gqlS+onR3SgEAAACYNAaDAADgkY5dvqP//sPLbteYNgFvWGVpXUbWFQqoLZjmUqNho4eXDxJgdvJ5PfJ6Lfk8lnxej3xeS16vZzj3WA8u37vee/9P76jbex5czkhLlt/vVbLPo1g09ugh3v3VeX6vfF53BnjT5fLZFvV0DhlZ9QsL5UvyutQIicqxHQ0euCW7P2zk/iW58i/Pc6kVAAAAgKnAYBAAADzEcRz92+F6t2tMI0dLs9pHVtBJwysIr/cUaHj9G5CY7g/LHhqs3RuueccM2u7fzhi0eT3yjQzbrFFfP2ow9+gB3sOP+2AA6L33PMMrV6f2/2+5uWnyej2KxWx1dg48/Q6zSCgY1clD9UZWUJyhpasK3SmEhBY806Joc7+RefNTFdhaOquG6QAAAMBcxGAQAAA85P3b3WrpGHS7xrTJTxlQbor579c4kK3BKOclzRWWpeGVZh5LXo8lz/2hmef+P4+6bkx2b8g1OvONXOcxHs8z9nG9DzKfxzPq+tHXPXjs+4O1+18/GLQ9GOBNx6ANieP00VsKDkWNbPvLi3hNYMLC9d0KXWwzMivFp7RdFbK8nifet6u1Ue1NdSooW6LsAs61BAAAAOIRg0EAAPCQvWeajMsF2QFVrih88hl3Y7OnnYk3OtMjztXTJG4/9r6j+sTsqM4c/alCwQf/fv7kgH71lVeVlJQkPeosPsuSR7p3Dt+D4Yvn3nUj5+jdY98741COM3K+4f1cjmRr9FmH985D1JizFI3zEx91vuKTz2wcPgdy1Ncyz4R80HF0hyef1/hQTz2iy6jbPPS89/5dbMdRSkqSvF6PPJYUDkVHBmLesUOyUYMyY4DmteS1hgdzD9/eM2ZI9/BAbvR/LyDR9XYP6fxJ8+/thcvyVTI/251CSFixriENHm4wQ0tK27VAnrSkJ9636cZ5HfnR12TbMXm8Xr387z6v3KLyaWwLAAAA4FkwGAQAAIaegbBOXzNXCnzo+UXasbrIpUZT68yZEwoFzdWC1VXbVFGS41KjuWkub/kITLXj+2/Kjj04j9TjsbR110IXGyER2aGoBvbUS1HbyANVpfIVpT/xvgO9nTr+1v+UbceGHysWU0vdJQaDAAAAQBx68j4gAABgzjl4rlkx+8EbzP4kr17aXOZio6nT29ujS5fOGVlxcYkqKha71AgAJudOU69uXDE/zLF6U4myc1NdaoRE5NiOBg/ckt0fNnL/0lz5l+c98b62HdOxn7ypSGjIyPNLF015TwAAAACTx2AQAACMsG1H+882G9nODaVKT038s/ccx9GJE0dk2w9WQliWpaqq7ZzBBSAhOY6jI3tqjcyf7NOW5xa41AiJKnimRdHmfiPz5qcqUF361O+Rl47+TO3NdUZWvnyTCucvm/KeAAAAACaPwSAAABhxoa5DHb1BI3t9e4U7ZaZYY+MtNTWZ5yatXLlG2dm5LjUCgMmpe79dd5t6jWzz9nKlBJ58FhwwWri+W6GL5qpTK8WntF0VsrxPfsvg7u1runz8bSNLy8rT5ld+mQ/dAAAAAHGKwSAAABix70yTcXlRaZaWzs92p8wUikajOnHiqJEFAgGtW7fZpUYAMDmxqK1j+8xVWpnZKVq7udSlRkhEsc4hDR42PzQjS0rbtUCetCcPmIODfTr20zclPdh+3PJ4tO2Dn5I/OTANbQEAAABMBQaDAABAktTeM6TztR1G9oGtC2bFJ/4vXjyr/v4+I9u8eav8/sTfIhXA3HTxdJN6u80V3lt3LZLXx694GB87FNXA3nopaht5oKpUvqL0J97XcWzVvPU/FRwwV6yue+5DypvHVrYAAABAPOO3RgAAIEk6cK551Gf+pUCyV89vSPyVJ319vbp48ZyRFRXN08KFS1xqBACTExyK6OTh20ZWXJqpRcvzXWqEROPYjgYP3JLdHzZy/9Jc+ZfnPfX+107vV8vNy0ZWvGCFlm95cUp7AgAAAJh6DAYBAICiMVsHzrUY2bbVxQok+1xqNHVOnDgi246NXLYsS1VVz82KlZAA5qaTh28pHIoa2baXFvP3GsYteKZF0eZ+I/PmpypQXfrU11Hn3ds6f/CHRpaSmqHqD/yqLIu3GAAAAIB4x0/tAABAZ663q3fAXDWwa2PirxZsbLylxkZzVc2KFauVk5PrUiMAmJzuzkFdOt1sZEtWFqi4NNOlRkg04fpuhS62GZkV8CltV4Us75PfIoiEgzr6428YH7iRpOrXP6GUNF6DAAAAQCJgMAgAALT3dKNxeWlZlsoKnny+ULyLxaKqqTliZCkpAa1fv8WlRgAwecf23ZRtP9j42eO1VP3CQhcbIZHEOoc0eLjBDD2W0nZVyJOW9NT7n3rv++rvNoeKKypfUfGCFVNZEwAAAMA0YjAIAMAc19IxoKu3u43sxVmwWvDixXPq7+8zss2bq+X3+11qBACT03y7WzevtRvZui2lyswOuNQIicQORTWwt16K2kYeqCqRrzDtqfe/eblGt66cNLLc4gVau/2NqawJAAAAYJoxGAQAYI7bd8bcki49kKTNywtdajM1+vp6dfHiWSMrKCjSokVL3SkEAJPkOI6O7KkzspSAT5u2LXCpERKJYzsaPHBLdr+5bbh/aa78y/Keev++rladfu/7RpbkT9G2Nz4lj9c7pV0BAAAATC8GgwAAzGHhSEyHL7QY2fPr5inJl9g/Ipw8eVSx2IPzjyzLUnX1DlmW5WIrAHh21y+3qu2OuQp6y44KJaf4XGqERBI806Joc7+RefNTFagufer3xlg0oiM/+pqiEXOoWPnaryg96+lDRQAAAADxJbHf9QMAAJNSc6VVg6Gokb2wocSlNlOjqem2GhpuGdmyZauUm8ublwASUzQS0/H9N40sKzegVRvmudQIiSR8s0uhi+a5gFbAp7RdFbK8T39L4NzBH6q7rcnIFq3drvnLNk5pTwAAAAAzg8EgAABz2N4z5ht9axbmqjAn1aU2kxeLxVRTc8TIUlJStHHjFpcaAcDknT/ZpP7ekJFt27VI3nEMdTC3xTqHNHik0Qw9ltJ2VciTlvTU+zfVXtD1M/uNLDOvWBt3fXQqawIAAACYQfwmCQDAHHXrTp9utvQa2a6NpS61mRqXLp1TX5/577RpU7X8/mSXGgHA5AwOhHX66G0jK5mfpYqlrILGk9mhqAb21ktR28gDVSXyFaY99f6DfV2qeevbRub1JmnbBz8lX5J/KqsCAAAAmEEMBgEAmKPGrhbMyUjW+iWJ+0Zzf3+fLlw4Y2T5+YVavHiZS40AYPJOHrqlSDhmZNtfXsyZqXgix3Y0eOCW7H7zXED/0lz5lz39e71t2zr2028qHBww8g27PqLsgsTechwAAACY6xgMAgAwBw0Gozp++a6R7VxfIq8ncX80OHnyqGIx883z6uodvHkOIGF1tQ/o8tlmI1u2ukgFxRkuNUKiCJ5pUbS538i8BakKVJeO6/vileNvqa3xhpGVLV2vxeuem9KeAAAAAGZe4r77BwAAntnRS3cUijwYonksSzvXJ+4KgObmRt2+XW9ky5atUl5evjuFAGAKHN1bJ8d5cNnr86j6hQrX+iAxhG92KXSxzcisgE9puypkjeNcyrbGWl069jMjS83IUeWrv8KHbQAAAIBZgMEgAABzjOM42nfW3EZ0w9J85WQk5jl8sVhMNTWHjSw5OVkbN25xqREATF5jfZdu1XYa2fqqMqVnprjUCIkg1jmkwSONZuixlLarQp7UpKfePzQ0oKM/+YacURNpy/Jo2xu/Jn9K6lTXBQAAAOACBoMAAMwx1xt71NRmnhn04sZSl9pM3uXLF9Tb22NkmzZVKzmZN88BJCbbdnRkT62RBdKStLF6vkuNkAjsYFQDe+ulqG3kgaoS+QrTnnp/x3F04u1va6i/28jXbP+g8ksWTWFTAAAAAG5iMAgAwBwzdrVgYXZAKytyXGozOQMD/bpw4bSR5ecXaMmS5S41AoDJu3bxrjpazQ9wVD1fIX+yz6VGiHeO7WjwwC3Z/WEj9y/NlX9Z3rge48bZg2qqvWBkhfOXaUXlK1PWEwAAAID7GAwCADCH9A6GdfJqq5Ht2lgqT4KeGXTy5DFFo1Ejq6rawRlIABJWJBzT8QM3jSwnP1Ur1s1zqRESQfBMi6It/UbmLUhVoLp0XN8Tu9qadPbAD4wsOZCmra9/Qh4PbxsAAAAAswk/4QMAMIccvtCiaOzBuUE+r0fPrS12sdGza25u1K1bdUa2dOkK5ecXuNQIACbvbE2DBses+tr+0mJ5PHzgAY8Wvtml0MU2I7MCPqXtqpDlffqv/NFISEd/9DXZsTEftNn9qwqkZ01pVwAAAADuYzAIAMAcYTuO9p9pNrLKFQXKSPW71OjZxWIxnThxxMj8/mRt3FjlUiMAmLyBvpDOHm8wsrKKHM1fmJjbPWP6xTqHNHik0Qw9ltJ2VciTmjSuxzi955/U12XuJrBs84sqWbR6qmoCAAAAiCMMBgEAmCMu13eqtXvIyF7cWOZSm8m5cuWienq6jWzTpkqlpKS4UwgApkDNwXpFI7aRbX9pEdsj45HsYFQDe+ulqPmaCVSVyleYNq7HuHX1lG5eOm5kOUXztW7Hh6aqJgAAAIA4w2AQAIA5Yu/pJuNyWUGaFpdmutTm2Q0ODuj8+VNGlpeXryVLVrjUCAAmr6O1X1fP3zGyFeuKlVeY7lIjxDPHdjR44JbsMdvO+pfmKnl53rgeo7+7TSff/a6R+ZKSte2Dn5LX65uyrgAAAADiC4NBAADmgM7eoM7d6DCyFzeWJuQqlJMnjykaHXMOUtUOeTz8WAMgMTmOoyN7zDNTfUkeVe2scKcQ4l7wdIuiLf1G5i1IVaC6dFz3j8WiOvLjbygaDhn55ld+SRk5nNULAAAAzGa8gwYAwBxw4FyzbMcZuZyc5NXW1cUuNno2d+40q76+1siWLFmugoJClxoBwOTdrutUY32XkW2snq+09GSXGiGehW92KXSpzcisgE9puypkecf3K/6FQz9S193bRlaxuloVKyunrCcAAACA+MRgEACAWS5m2zpwrtnItq0uUiA5sbYJs21bx48fNjK/369Nm6pcagQAk2fbjo7uNVcLpqX7tb5qvkuNEM9inUMaPNxghh5Labsq5ElNGtdjtNy8rPdP7TGyjJxCbXrxF6eqJgAAAIA4xmAQAIBZ7uz1DnWPOYNo18bxbTUWT65evaieHnNFzYYNlUpJCbjUCAAm78q5FnW1DxpZ1c6FSvJ7XWqEeGUHoxrYWy/FHCMPVJXKV5g2rscY6u/R8Z99y8g8Xq+2vfEpJflZoQoAAADMBQwGAQCY5fadbTIuLy7JVHlRhkttns3g4KDOnTtlZLm5eVq2bKVLjQBg8sKhqE4crDey/MJ0LV9b5E4hxC3HdjR44JbsMR/08S/NVfLyvPE9hmPr+M++qdCQeTbh+p0fVk5h2ZR1BQAAABDfGAwCADCL3e0a1KWbnUaWiKsFT506pkgkYmTV1Tvk8fCjDIDEdeZYg4YGzb/btr20SJZludQI8Sp4ukXRFnOg5y1IVaB6/N/Tr5x4V3dvXzOyksVrtHTDzinpCAAAACAx8G4aAACz2P6z5tmCaSk+Va4odKnNs7l7t0U3b94wssWLl6mggBU1ABJXf29Q5040GtmCxbkqq8hxqRHiVfhml0KX2ozMCviUtqtClnd8v9K3N9/UxcM/MbJAepaqXvs4g2gAAABgjmEwCADALBWJxnTofIuRPbd2nvxJiXNulW3bOn78sJElJfm1aVO1S40AYGoc339Tsag9ctmypG0vLnKxEeJRrHNIg4cbzNBjKW1XhTypSeN6jHBwUEd/8g05zujXm6WtH/w1JQfGdzYhAAAAgNmDwSAAALPUyffb1D9kblH3woYSl9o8m/ffv6TubnMr1A0btigQCLjUCAAmr7WlT9cutRrZqg0lyslnSIMH7GBUA3vrpZhj5IHqUvkKx/dacRxHJ975rgZ7ze+lq6p3q7BsyVRVBQAAAJBAGAwCADBL7T3TZFxeuSBH8/IS503noaFBnT170shycnK1fPkqlxoBwOQ5jqOje2qNLMnv1ZYdC1xqhHjk2I4GD9yS3R82cv/SXCUvyxv349RdOKLG62eNrKB0sVZt3T0VNQEAAAAkIAaDAADMQo2t/brR2GNkL24sdanNszl16rgiEXPFY1XVDnk8/PgCIHHVX+9Qc4P59/OmbeVKTfO71AjxKHi6RdGWfiPzFqQqUD3+7+Xd7c06s/efjcyfkqqtH/ykPJ7E2VYcAAAAwNTinTUAAGahvWfN1YJZaX5tWJrvUpuJa229o7q660a2aNFSFRUVu9QIACYvFrN1dF+dkaVnJmtdZZlLjRCPwje7FLrUZmRWwKe0XRWyvOP7FT4aCevoj7+hWGzMB2xe+1+VmpEzZV0BAAAAJB4GgwAAzDLBcFRHL94xsufXl8g3zjcT3Wbbto4fP2xkSUlJ2ry52qVGADA1Lp9pUU/nkJFVv7BQPl9i/P2M6RfrHNLg4QYz9FhK21UhT2rSuB/n7P5/UW9Hi5Et3bBTpUvWTUVNAAAAAAmM30ABAJhljl2+q2A4NnLZsqQX1pe42Ghirl27rK6uDiNbv36LAoFUlxoBwOSFglGdPFxvZIXzMrR0VaE7hRB37GBUA3vrpZhj5IHqUvkKx39GcMO1M6o9b37AJrugVOt3/vxU1AQAAACQ4BgMAgAwiziOo31nzG1E1y/OV15WikuNJmZoaEhnzpw0suzsHK1YsdqlRgAwNU4duaXgUNTItr+0WJZludQI8cSxHQ0euCW7P2zk/mW5Sl6WN+7HGejp0Il3vmtkviS/tr3xKXl9419xCAAAAGD2YjAIAMAsUtfSq9t3+41s18bEWS14+vRxRSLmm6LV1Tvk8fAjC4DE1ds9pAunzA9tLFyWr3nzs1xqhHgTPN2iaIv5/dtbkKpAVem4H8OOxXT0J99QJGRuV7vppY8pM7doSnoCAAAASHy8ywYAwCwydrVgflaK1iwc/0oDN7W13VVt7TUjW7hwiYqK5rnUCACmxrF9N2WP2h7S47G07cVFLjZCPAnXdSl0qc3IrIBPabsqZE3gfOCLR3+ijpZ6IytfsVkVq6qmoiYAAACAWYLBIAAAs0T/UEQ1V1qN7IUNJfJ44n+bOtu2dfy4eR6Sz5ekzZurXWoEAFPjTlOPaq+aQ581m0qUlRNwqRHiSbRzSINHGszQYyltV4U8qePf+vPOrfd1peZdI0vPyteWl3+J7WoBAAAAGBgMAgAwSxy50KJI1B657PVY2rEuMbYRvX79qjo7241s/frNSk1Nc6kRAEye4zg68l6tkfmTfdr83AKXGiGe2MGoBvfWS6NWk0pSoLpUvsLxf/8LDvbp+M++KWn0qlSvtr3xKSUlM4AGAAAAYGIwCADALOA4jvaebTayzcsLlJXmd6nR+AWDQZ05c8LIsrJytHLlGpcaAcDUqL3aprvNfUa25blypQTGvxIMs5NjOxo8cEt2v3murn9ZrpKXjX8LcMexdfxn31JwoNfI1z3/IeUWl09JVwAAAACzC4NBAABmgau3unS3c9DIXtxY6lKbiTl9ukbhcMjIqqufk8fDjykAElcsauvYvptGlpmdojWbEuPvZkyv4OkWRVv6jcxbkKpA1cReH++f2qc79VeMbF7FKi3btGuyFQEAAADMUrzjBgDALDB2teC8vFQtm5/tTpkJaG9v1Y0bV42somKRiosTYwtUAHicC6ea1NcTNLKtuxbJ6+NXsLkuXNel0CXz3Ekr4FPargpZ3vG/Pjru3NL5Qz80spS0TFV94OOyLF5nAAAAAB6N3xYAAEhw3f0hnblmvsG4a2OpLMtyqdH42Lat48cPG5nP59PmzVtdagQAUyM4FNGpI7eNrLgsU4uW57vUCPEi2jmkwSMNZuixlLarQp7U8W8xGwkN6eiPvy7Htkellra+/gmlpGZMTVkAAAAAsxKDQQAAEtzB8y2K2c7IZb/Po+fWFLvYaHxu3HhfHR3mQHPduk1KS0t3qREATI2Th24pHIoa2faXFsf9BzYwvexgVIN766WYY+SB6lL5CtPG/TiO4+jku9/XQE+Hka+selVF5cunoioAAACAWYzBIAAACcy2HR0422RkVauKlJoy/lUHbggGgzp9usbIsrKytXLlWpcaAcDU6O4c1KUz5vbOS1YWqKgk06VGiAeO7WjwwC3Z/WEj9y/LU/KyvAk91s1Lx3X7/VNGljevQmu2vT7pngAAAABmPwaDAAAksPN1HeroDRnZixtLXWozfmfPnlA4bPaurNwur9frUiMAmBrH9tbJHrWK2+u1VP3CIhcbIR4ET7Uo2tJvZN6CVAWqJnambm/nXZ3e809GlpQc0LYP/po8fA8FAAAAMA4MBgEASGD7zpirBRcUZ2jhvPheldLe3qZr164Y2YIFC1VSUuZSIwCYGs23u3Xzurm949otZcrMTnGpEeJBuK5Locvm1tlWwKe0XRWyvOP/lTwWjejIj76mWNRcdVj56q8oLWtiqw4BAAAAzF0MBgEASFDt3UO6UGu+AR3vqwUdx1FNzWEj8/l82rJlm0uNAGBqOI6jI3tqjSwlkKRN28pdaoR4EO0c0uCRBjP0WErbVSFP6sS2/T574F/V025uU7t43XOav2zDJFsCAAAAmEsYDAIAkKD2n2uWM+pyINmn6pVFrvUZjxs33ld7e6uRrV27UWlp6S41AoCpcf1Sq9rumFtFVu5YoOQUn0uN4DY7GNXg3nop5hh5oLpUvsK0CT1W043zunH2gJFl5c3Thhc+MtmaAAAAAOYYBoMAACSgaMzWwXPmqoHta4qV7I/f84VCoaBOn64xsoyMLK1atc6lRgAwNaKRmI4fuGlk2bkBrdwwz6VGcJtjOxo8cEt2v7ntp39ZnpKXTWzbz8G+LtW89W0j8/qStO2NT8mX5J90VwAAAABzC4NBAAAS0OlrbeodjBjZrg0lLrUZn7NnTyoUChpZVdV2eb3xO8wEgPE4f7JJ/b0hI9v64iJ5J3B+HGaX4KkWRVvMFaTeglQFqib2vdq2Yzr2kzcVDg0a+cZdv6CsfAbPAAAAACaO31QBAEhA+840GZeXzc9WaUH8bsfZ0dGua9euGFl5eYVKS+e71AgApsbgQFinj942spLyLFUsmdiqMMwe4bouhS63GZkV8CltV4WsCQ6LLx97S21N5tmV85dt0KK1nM0LAAAA4NkwGAQAIME0tw/o6u1uI9u1MX5XCzqOo5qaw3KcB2cseb1ebdnCm5oAEt+JQ/WKhGNGtv2lxbIsy6VGcFO0c0iDRxrM0GMp7cUKeVKTJvRYrQ3Xdfn4W0aWlpmrLa/8O15fAAAAAJ4Zg0EAABLMvrPmasGM1CRtXlboUpunq629pra2u0a2du1GpadnuNQIAKZGZ/uArpxtMbJla4pUUMzfb3ORHYxqYM9NKeYYeaC6VL6CtAk9VmioX8d++qbxoRrL8mjrB39N/pTUKekLAAAAYG5iMAgAQAIJRWI6cuGOke1YN09Jvvj8lh4Oh3T69HEjy8jI1OrV61xqBABT5+jeOo2a28jn86h650L3CsE1ju1o8MAtOQPm+b/+ZXlKXjaxbWUdx1HNW9/WUH+Pka997g3ll/D6AgAAADA58fkuIgAAeKSaK3c1GIqOXLYkvbCh1L1CT3H27EkFg0Ejq6zcLq/X51IjAJgajfVdul3baWTrq8qUnpnsUiO4KXiqRdGWfiPzFqQqUDXxrb6vnzmg5rqLRlZUvlwrKl+eVEcAAAAAkBgMAgCQUPadMbcRXb0oV4XZAZfaPFlnZ4fef/+ykZWVLVBZWblLjQBgati2oyPv1RpZIC1JG6rnu9QIbgrXdSl0uc3IrIBPabsqZHkn9it3V2uDzh38gZElp2ao+vVPyLL49R0AAADA5PGbBQAACaL+Tq9utvQZ2Ysb43O1oOM4qqk5bJyN5PV6VVm5zcVWADA13r9wRx1tA0ZW9fxC+ZNZDT3XRDuHNHikwQw9ltJerJAnNWlCjxUJh3TkR1+XHYsZefUHflWBtMzJVgUAAAAASQwGAQBIGGNXC+ZkJGvd4omdWzRT6uquq7XVPAtxzZoNysjgjU0AiS0SjqnmYL2R5RakacW6YncKwTV2MKqBPTelmGPkgepS+QrSJvx4p/f8o/q7zZWHK7a8rHkVKyfVEwAAAABGYzAIAEACGAxGdezyXSN7YUOJvJ74+1YeDod16tRxI0tPz9Dq1etdagQAU+fs8QYN9oeNbNuLi+TxWC41ghsc29Hg/ltyBiJG7l+Wp+RlE//QTv3lE6q/XGNkuUXlWvPcG5PqCQAAAABjxd+7iQAA4CFHL91ROGKPXPZYlp5fV+Jio8c7d+6kgsEhI6us3C6fjy32ACS2gb6QztaY20bOX5ij8kW5LjWCW4KnWhS9029k3sJUBaom/r25r6tVp977vpEl+VO07Y1PyevleycAAACAqcVgEACAOOc4zkPbiG5clq+cjGSXGj1eV1enrl69ZGSlpeWaP3+BS40AYOrUHKxXdNSHNCxreLUg5pZwXZdCl80tP62AT2m7KmR5J/Yrdiwa0dEff13RSMjIt7zyy0rPzp90VwAAAAAYi8EgAABx7npjj5raB4xs18ZSl9o8nuM4qqk5LMd5cNaSx+NVVdV2F1sBwNRov9uvq+fNs1NXrCtWXmG6S43ghmjHoAaPmKtG5bGU9mKFPIGkCT/e+UP/pq7WRiNbuHqryldsnkxNAAAAAHgsBoMAAMS5sasFi3ICWrkgx6U2j3fzZq3u3m0xsjVr1isjI9OlRgAwNRzH0dG9tUbmS/Ko6vmFLjWCG2JDEQ3srZdijpEHqkvlK0ib8OM1113StdP7jCwjt0ibXvqFSbQEAAAAgCdjMAgAQBzrHQzr5PutRvbChlJ5LMulRo8WDod16tQxI0tPz9CaNRvcKQQAU+h2Xaca67uNbOPWcqWm+90phBnn2I7u/vianIGIkfuX5Sl5Wd6EH2+ov0c1b33LyDxen7a/8Sn5kuJvq3AAAAAAsweDQQAA4tjh8y2KjlqZ4PN6tGPdPBcbPdr586c1NDRoZFu2bJPP53OpEQBMDdt2dHRPnZGlZfi1vqrMpUZwQ/v+mwo29BiZtzBVgaqSCT+Wbds69tM3FRoytwnf8MJHlF0Qf1uFAwAAAJhdGAwCABCnbMfRvrPmNqKVKwqV/gxnGE2n7u4uXblywchKS+dr/vwFLjUCgKlz5VyLujrMDz5U7VyopCSvS40w0/qvtqn7VLORWQGf0nZVyPJO/FfqqyfeUWvDdSMrXbJOS9bvmFRPAAAAABgPPsYPAECcunyzU23dQSN7cVN8rSRwHEc1NYflOA9WNXo8HlVWbpcVZ9udAsBEhUNR1RysN7L8onQtX1PkTiFMO8d2ZPcEFe0YUqxjULGOIcU6hswbeSylvVghzzN8UKetqU4Xj/zUyFIzclT56q/wfRMAAADAjGAwCABAnNp7xlwtOL8wXYtLMl1q82j19XW6c8dcRbF69XplZma51AgAps7pY7cVHDTPlNv+0iIGOLOEYzuye0OKdQwp2jE4PAjsDEpR+4n3C2wtla8gbcLPFw4O6thPviHHefD4lmVp6wc/qeTAxB8PAAAAAJ4Fg0EAAOJQZ29QZ2+0G9mujaVx9WZ0JBLRqVPHjCwtLV1r1250qREATJ2+nqDOnzA/oLFgSZ5KF+S41AiT4TiO7N7wyCrA6L0/nzYEHMu/PE/JS/Oe6flPvP0dDfZ1Gfnqba+roHTxhB8PAAAAAJ4Vg0EAAOLQgXPNGrU7p5L9Xm1dFV9b150/f1qDgwNGtmXLNvl8/HgBIPEdP3BTsejolV3SthcXudgI4+U4juz+8PA2oO0PBoGKTGwIOFb6inx5t8x7pvvWnj+kxhvnjKywbIlWVr02qU4AAAAAMFG8cwcAQJyJxmztP2duz7ltdbECyfHzbbunp1uXL583snnzylReXuFOIQCYQq0tfbp+qdXIVm8sUU5eqkuN8DiO48gZiAyvAGx/cC6gE45N6nGttCT58lLlzQ8ouyJXqfMyJL9XnZ0DT7/zGN1tzTqz71+MzJ+SpurXPymPxzOpngAAAAAwUfHzDiMAAJAknbvRrp7+sJHt2lDiUpuHOY6jmprDckYtafR4PKqu3h5XW50CwLNwHEdH9tQamT/Zqy07FrjUCPc5jiNnMKLoqJWAsY5BOaFJDgFTk+TNC4wMAr15qfKkPPhVOTU3TV6vR7HYxFccRiMhHf3x12THokZe/YGPKzUje1K9AQAAAOBZMBgEACDO7D1jnmm1uDRT5UUZLrV52O3bN9XSYnZctWqdMjOz3SkEAFOo/nqHWhp6jGzTtnIFUv0uNZq77MGIYh2DxiDQCUaffscnsFJ88uanypc3PAD05gXkSU2aosYPO7P3n9XbedfIlm3apZJFa6btOQEAAADgSRgMAgAQR+52DupyfZeRvbix1KU2D4tEIjpx4qiRpaamae3ajS41AoCpE4vZOrq3zsgyMpO1dkuZS43mDnsoMrIC8P4g0Bma5BAw2StvfuqD1YB5qbJSfTO2uv32+6dVd9H8npldWKZ1Oz40I88PAAAAAI/CYBAAgDiy76y5Ei8txafKFYUutXnYhQtnNDhonq+0Zcs2JSVN32oLAJgpl840q6dryMiqdy2Sz8c5cFPJDkZHzgKMdQwp2jEoZyAyqce0kr3yjloF6MtLlZWW5NoW1/09HTr5zneNzJfk1/Y3PiWvj++ZAAAAANzDYBAAgDgRicZ06HyLke1YN09JPq9LjUy9vd26fPm8kRUXl2rBgoUuNQKAqRMKRnTy0C0jK5yXoSUrC1xqNDvYoejIAPD+MNAec47uhCV55Lu3EnBkO9B0f9ycc2vHYjr6468rEg4a+eaXf0kZOfHzYR8AAAAAcxODQQAA4sSJq60aGHN20q4N8bGNqOM4qqk5Itu2RzLLslRVtT1u3ogFgMk4deS2QmP+Dt7+8mL+jpsAJxwb3gZ0ZDXgoOy+yQ8Bvbn3tgLNHx4EejLiZwj4KBeO/Fidd8wh84KVlapYVeVSIwAAAAB4gMEgAABxYt+ZZuPyqoocFeWmutTGVFtbq+bmRiNbtWqtsrNzXGoEAFOnt3tIF06ZWzkvWp6veWVZLjWKf04kdm8b0FErAXtDk3tQ3/0hYGDkbEBPZnJcDwHHulN/RVdPvGtk6dkF2vzyx1xqBAAAAAAmBoMAAMSBhtZ+3WjqMbJ4WS0YiUS0f/8+IwsEUrVu3SZ3CgHAFDu276bsmDNy2eOxtHXXIhcbxRcnaivWOTwAjLbfWwnYM8khoNeSN3d4BaDv/krAzGRZnsQZAo4VHOjV8Z99y8g8Hq+2vfEpJflTXGoFAAAAACYGgwAAxIF9Z8yVKlnpfm1Ymu9SG1NNTY36+vqMbMuWrUpK8rvUCACmzp3GHtVebTOyNZtLlJUTcKmRu5yorVjXvTMB2wcV7RiS3ROUnKff97E894eADwaBnqyUhB4CjuU4to7/7FsKDprfL9ft/F+UWzTfpVYAAAAA8DAGgwAAuGwoFNWRS3eMbOe6Evm8HpcaPdDd3a0TJ04YWXFxiSoqFrvUCACmjuM4OrKn1siSU3zavH2BS41mlhOzFesKjmwFGm0flN09BUPAnBR584a3AvXmp8qbPbuGgI9y9eQe3bl11chKFq3Wso27XOkDAAAAAI/DYBAAAJcdv3xXoXBs5LJlSS9sKHGx0TDHcbRv3z7FYqO7Waqq2p5Q5z0BwOPUXm3T3WZzhdfm5xYoJZDkUqOp49iOnKGI7MGo7MGInMGI7KHIg68HI7L7wpI9iSmgpVFDwFR58wPDQ8A4+GDLTOpoqdeFwz8yskBalqp2f5zvlwAAAADiDoNBAABc5DiO9o7ZRnT94nzlZrp7FpHjOLp06Zzq628a+cqVa5SdnetSKwCYOrGorWP7zL/jMrNTtGaT+x/MeBLHceSEYw+Ge4MROfeGf/Zg5N4wMCJnKDq1T2xJnuwU+e5tB+rNS5U3d+4NAccKh4Z09Mdfl2Pbo1JL1a9/QsmBdNd6AQAAAMDjMBgEAMBFdc29amjtN7IXN5W61GZYLBbV0aMHVVd33cgDgYDWrdvsUisAmFoXTjWprydoZFt3LZLXxUGXE7WN4d5DQ797q/4Um8xen+PjyUqWNz/1wSAwNyDLN7eHgGM5jqOT735PA72dRr6q+jUVlS9zqRUAAAAAPBmDQQAAXDR2tWB+VopWL3RvRd7Q0KD27n1b7e2tD123Zcs2+f1+F1oBwNQaGozo1JFbRlZclqlFy/On5fkc25ETjJrbeA6N+nowKmcwImfUttIzyZOZPLwNaG6qfPmB4SFgkteVLonk5sVjanj/tJHlly7S6m0fcKkRAAAAADwdg0EAAFzSPxRRzRVzALdrY6k8Lp1H1NHRrr1739Lg4MBD1z333HNauHCJC60AYOqdOnxL4ZA5hNv+0uIJnwfnOI4Usc0VfcbqvujICkBN/yK/R/NYsgI+eVKT5ElNknXvT09qkjxpSfLmBGT5GQJOVE9Hi07v/Scj8yenauvrn5THw/+eAAAAAOIXg0EAAFxy+EKLorEHZxJ5PZZ2rJ3nSpebN2t15Mg+xWLmG+VJSUn64Ac/qIULF6mz8+GBIQAkmu7OQV0602xkS1YVqqgk08icmC17KPrwsM/Y6jMqRW25xUrxyZPqGx72BR4e/FmpSbKSvRMeeOLJopGwjv74G4pFI0Zeuft/VVom5/ACAAAAiG8MBgEAcIHjONo3ZhvRLSsKlZk2s1t1Oo6js2dP6sKFMw9dl56eoQ9/+MMqKipULObeG98AMJWO7qlVkqRUn1cBr6U0n0eVhRkaPNJgDACdkDvbekqSfJ5Rw717q/0CY4Z+AZ8sF89DnMvOHfiBetrHDJfX71DZknUuNQIAAACA8WMwCACAC67c6tLdriEj27WhZEY7RCIRHT68V7dv1z90XVHRPL3wwivKz5+e87YAYCo5tiPFbMWGIoqFYwr3hhS+0/fQWX6R3pCqgjF5i7LM+7/fofBMFLUkK5D08NDv/kq/+6v+2NozbjVeP6cb5w4ZWVZ+iTa88BGXGgEAAADAxDAYBADABWNXC5bmp2nZ/OwZe/7+/j7t3fuWuro6H7pu2bKVqqzcLq+XN6YBPJ3jOMPn58VsOTFnHH86cmL24/+0n3T9ox/3/vl9PU/p6pGkadpW00r2PljRN7K6z2ds72kl+2R52NYzUfV1tavm7W8bmdfn1/Y3PiWvL8mlVgAAAAAwMQwGAQCYYd39IZ253m5kuzaWztgZUHfvtmjfvncUCgWN3LIsVVZu1/LlqziPCkggjuNI9tiB22MGc/Z4BncTGeCZg7lZyWs9vKLvUUM/tvWc1exYTO/+41cVCZmr/Te9+AvKzCt2qRUAAAAATByDQQAAZtjBc82K2Q/eRfcnebRt9cy8qXj9+lUdP35Itm2eGej3J+uFF17RvHmlM9IDiCcjK97s4QHbyKDNdoYHafa965z7l8fe7t5Wls7o+ziPuM/Tbifzecfe7gkDO0yc4zjmNp4jZ/fd/9on6962nnxYAjXv/avu3L5hZOXLN2nhmq0uNQIAAACAZ8NgEACAGWTbjvafazayrauKlJoyvd+SbdvWyZPHdPXqxYeuy8rK1osv7lZmZtYj7gk82uMGXI//Wkbe3zYkS5IdtRXqCz56iPbQgG4CQ7RH3W7s440aBmJ28SR7pZQHK/paOwdVW9+lwZijIdvWUMzWro+sUsVSzlHF0zXVXdXJfT8ysrSsPG1+5ZcZGgMAAABIOAwGAQCYQedrO9TZGzKyXRund5VeKBTSgQPvqqWl6aHrSkvn6/nnX5bf75/WDnjgodVpjxukjd4e8jEDstGr2Z7tvqMGaLEnDN0edZ9JGpz0IyAheK3hLTaNPy3J63nwp2fM5Uf+OZ7beJSTl6aklCQ5XkudnQOSpMGBsN77uxpFwrGRWiXl2VqwJM+t/1UQ58LBQfV03FFvR4t6Ou6o8dqZ4Q8f3GN5PNr2wU/JnxxwsSUAAAAAPBsGgwAAzKC9Z8zh3MJ5Gaoozpy25+vp6daePT9TX1/vQ9etXr1OGzdWyeOJr3OxRg+y5DxiZdqjtny8fzvjthr5+sFKNI2538Mr0x5cL3MF2/3bjnoOY4g2dpvJsV+zOg1ueORg7nF/jm/4Zg7yHv948lgzvprKl+aXx+tRLPZgu+QTB+uNoaAkbX9pESu9oEg4qN6OO+ppb1FPR8vw1x0tGurveeL91j33IeXNWzBDLQEAAABgajEYBABghrR1D+liXYeR7dowfasFGxtu6+Ch9xSJRIzc4/Goak21FhUvVOzOgGKjzy2z759b5ki2rU6/T5Yt2TFbQ4Phxw/IbGd4McXYAdm97JHDvtG3ZWiGRGBJsu4NvDzDf8qSefn+MMwz+na6d9sxt7uXyaNRj2feZuTyRAZ39/6UpTk//OpsG9CVcy1GtnxNkQqKM1xqBDdEIyH1dtx9MPy7Nwgc7Oua8GMVL1ih5VtenIaWAAAAADAzGAwCADCFHOf+gM2RYveHbMN/njnRoIokn3yWJZ8lpSV5tSk1WaHrnVLMvncfZ9TX9sj2jk5s1MBu1G2dmP1gy8h7X9tRW7V2sy47t4eHFqMkO0mqjC1V7jlb/edqn/rvE5ym/50wR1h6MKAaMxDzJnmHB1iWpZjtTO0QzTPqdmMHeSO305jh3SMez2PJskbdbo4P2RLR0b11o3eAlM/nUdULC90rhGkVi0bU23lXPe0tI9uA9nS0aKCnU8OfQJmcrLxCVX/gV2VZ8bXSHgAAAAAmgsEgAGDOsfvDit7tlxN9xJDtEavmRlbTjR3CjRoAjpzP9oQVb1WSqvKzjCxypFGRR9/8mcRk67xuqsFqf2gomOWkqUpLFVDyFD4jps3YodXowZlljQzVHn+7R3z92Ns/5vksy9wScgLP9bTVarm5afLe2/Lx/llwwFRquNmp23WdRra+qkzpGfwdmOhisaj6OlsfDP/urQAc6Gkf/oDOFLA8HmXkFCkrr1jF8xeoYN58lSxaqb7+6JQ8PgAAAAC4hcEgAGBOiTT2auC9m27XmBZBhXVC19Vl9T90XYmTqw1aJJ+8LjSbJqO2dTRWpFnmAM0caslcFXb/67Grwkbfd+xKNWv0fTXytTFIG91hPIO0R62OA/DMbNvRkT11Rpaa5tfGreUuNcKzsGMx9Xe3qaej5d7w7456O1rU19Umx7Gf/gDjYFkepecUKCuvWFl585SZV6ys/HnKyC6Uxzv8PXP0BxkkBoMAAAAAEhuDQQBIMLdv39bf/u3f6siRI+ro6FB2draqq6v1H//jf9TixYvdrhf3Qu+3u11hWnRrQDW6pqAVfui6FU6ZlqpE1v0lhPe2d7TurwTzeh6sCrv39fCZZh4lB5Lk8XnkWFI4EnvyQOz+EGzUyrKRgd3o1WaPuu2jBm+jt3E0hnmj7gsAj3D+ZKM628yVqJU7K5Tkn0UfjphFbNvWQE+7MfzraW9RX1erbDs2Rc9iKT07f3gAmD9PmXnzlJVXrIycQnl9SVP0HAAAAAAQ/xgMAkACOXnypP79v//3GhgYUHl5uV588UU1NTXpRz/6kd5991199atfVVVVlds149r/z959h0dV5X8c/8xMeu8Jofem9I5IWXvHirq6rh3LrosuKqIgNlx1ZX+uylqwoIJdEUE6SFF679KTEEjvbcrvj0mGDOnJTALM+/U8eTJz65nJyT333u8932MK9pVZOY2+X6vNJrMks80ms03y9/eSr5+3UxDOYDKUvi8NzpkMkvHU9FPBPOdlj6YnaO3e3bKcdvPUy+SlIf2GqVXLNs77qUNvNNI9AjjbFBeZtXzBPqdpEdGB6nJ+XBOVCGVsNqvystLtPQDTjis71T4GYE76SVksrkusHRgS4RT8C4lsppCIWHl5+7hsHwAAWYwntgAAxaZJREFUAABwtiIwCABnicLCQj322GPKy8vTPffcoyeeeEJGo1GS9OOPP2r8+PEaN26cFi5cqICAgCYu7ZnLr08zyWCQOSWvtDdbWRDOWNpjzh48qyww5xSwKwuwOQXsyve4O7XsrGV/aPGmREcZQgK89fpfhsrLZGzQZ7HZbNq6daO27d5UYV5gYJBGjbpU4eGRDdoHAJxt1iw7oLycIqdpQ0a1k5EUvY3GZrMpPydDWanHlZ2WXBoITFZ2WrIs5oo92+srIDjcnvozsllpIDBOIRFx8vZhHEkAAAAAqAqBQQA4SyxcuFApKSlq06aNHn/8cUdQUJKuvfZaLVmyRAsWLND333+v22+/vQlLemYzeBnl3z++0fZXVGzR6p0nnKYN6xnf4KBgSUmJVq9epqNHD1eYFxMTpxEjLpafn3+D9gEAZ5vsrEKtWX7AaVrLduFq2TaiiUp0brPZbCrIzbKn/kxLLk0FelzZ6ckyFxfVvIFa8gsMcQr+lY0F6ONLOwcAAAAAdUVgEADOEjt27JAk9e/fXyZTxTGSBg0apAULFmjx4sUEBs8ga3efUEGR2fHeIGl4z4YFJnNzc7Rs2QJlZKRXmNexYxcNGDC00joCAOe6FQv2yVxidbw3GKTBIxl/t6FsNpuK8nPsPf9K03+W9QQsKSpw2X58/YOcUoCGRjVTSEScfP0DXbYPAAAAAPB0BAYBoBq5ublavXq11q5dq127dunw4cPKycmRr6+vYmJi1KNHD1111VUaNmyYDAb3pigrKLDfeAsNDa10flhYmCRp165dbi0H6mb55kSn9+e3j1RUWP17OJw4kawVKxaqsLDQabrBYFC/foPVpUt3t9dFADjTlJRYlHQkU9s2JjhN79KjmSKjCSrVRVFBbmnPv9IAYOnr4kLXjTPr4xfg6PUXGtXM8dovINhl+wAAAAAAVI7AIABU4aOPPtKbb76poqKKqbDMZrMOHTqkQ4cO6ccff1S/fv302muvKT7efSkqIyLsadASEhIqnV82PTMzU3l5eQoM5EZoUzt0PFuHk3Ocpo3o1bze29u/f4/Wrl0lq9XqNN3Hx1cXXvgnxce3qPe2AZvNJpvttNelvyubZyudcGrZ8sudtp7NptJfjm0WF1oUGuYv/wDvxv6oOMsVFZqVeiK39CdHKSdylZmW76ijZbx9TBowrE2TlPFMZLVaZbWYZTGX2H9bzCrIzXIK/mWlHVdRfk7NG6slbx+/0uBfvEIj4+w9AaOayS8gmIdYAAAAAKCJEBgEgCocOnTIERSMjY3VkCFD1L17d0VGRqqoqEhbtmzRnDlzlJ+frw0bNuiOO+7QV199pcjISLeUZ+DAgZo+fbpWrFihlJQURUdHO+aZzWZ9++23jvcEBs8Mp/cWjAjxVY/2da8fVqtVGzf+rt27d1SYFxoappEjL1VISOU9SeFeJSUWZablKz0lT+mp+crOLJDVYrMHxCSnYNnpgbRT70tf2057XS64Zn9ffrmqA26n9l3dfirOayoBgT4KjwpQRHSgIqMDFREdqIioQHn7kA4XUn5esSMImJKco9QTucrOLKx5RUm9B7ZUQJCPm0tYM5vNKovZXCEoZ7GYZTWXOP8uXcY+rXQZS8mp9U/bxunrO08rt77FLNtpD5W4kpe3b+nYf6eCf6GRcfIPCiMACAAAAABnGAKDAFAFg8GgCy64QHfffbcGDx4so9HoNH/06NG6//77dc899+jQoUNKSEjQ66+/rldeeaXCtsaPH69t27bVaf8XX3yxHn/8ccf7wYMHq1evXtqyZYvuvvtuTZo0SV27dlViYqLeeOMNJSaeCkKdXlY0vvzCEq3ddcJp2vCe8TIa63aDtKioSL/+uljHjydWmNe8eUsNG/Yn+fg0/Y3vc525xKKMtHxlpOYrPdUeBMxIzat1gAJVy88rVn5esRKPZDpNDw33twcJSwOGkTGBCgnzr/P/EM4ONptNudlFTgHA1BO5ysstrtf2wiL8dX7/eHuQ7bSgm/13WaCt5FQwrsK00wJ1Vax/KhhnKf3tvIzVanHxt9V0TF7eComILRf8swcAA0LCZTBw7gEAAAAAZwODzdaUz4gDwJkrMzPTMW5fdfbs2aNrr71WkuTv76/ffvtN/v7OY8jdcccdWrduXZ32P3r0aE2dOtVp2smTJzV27Fjt2OHcc8zX11cTJ07Us88+K4PBoG3btjVJsCgjI19m87lzA7QhFm84pi8W73e8NxkNeu2hIQoL8q31NrKyMrV06QLl5GRVmNetWw/16TPA7UHgiIhAmUxGWSxWpae7bnypM5XFbFVGmj34VxYEzCjtCcgZU9Pz8jKW610YpMgYe+AwIJDg+NnEZrMpM73AkQrUHgzMVVGhuR5bM8vLkCmTIUPexnT5eGXJoCIZDPa0magfo8mkkIi40l6A9uBfaFQzBYREeuTDR57WFqLxULfgDtSrc5eXl0nh4QFNXQwAwDmAHoMAUIXaBAUlqUuXLmrbtq0OHTqkgoICHTlyRF26dHFaZubMmS4pU0xMjL7++mstX75c69evV15enpo3b64rrrhCFos9INe6dWt6kDUxm82mZaelEe3dMapOQcHExGP69dclKilx7i1jNBo1ePCFat++k0vK6qksFqsy0wuUkZrnSAOakZqnrAwCgNWzSrLKIIvzb4NVkkWGKqad+m2VwWCxz7NZZbX5yWwLk8UWJptq/v8wm61KSbYHkaRTPXL9ArztvQrLehjGBCo8KlDe3qQjbWoWi1UZqfmn0oGeyFHayTyVFNfnIRKbTIZsmQzp8jZmyNc7S7JkyJ5ot5S1NO2ui8p/rjMYjQoOj7UH/krHAgyJjFNQWJSMRv5/AAAAAOBcRGAQAFwgKCjI8bpsXEJ3MRqNGjVqlEaNGuU0/bvvvpMkDRo0yK37R832HcvU8bR8p2kjezev1bo2m027d2/Xxo1rdXqnfj8/f40ceYmio2NdVtZzncViVVZGgb33X0qeowdgVkaBrFbXhQ5CwvwUERWo8KgA+fjaT68MBntKYvvwWgbHexlqmKfyyxkkWWWzWWWzmmWzmksDambZLOVeWy32+TaLrBaLbDaLY3mr1SKbxSyrrfS31b68tZLXjmkWiyxW+z4cY5lZzW4djNBgCpJVYSooCpbFFiazLUw2+cv+jVSvML9EiUcyq0xHWj5gSDpS9zGXWJSWkqeU5FxHb8C0lDxZLfWpNzYZlS+TIUM+Xhny88mSLOmyWUtOLXKWd1A3GAwymrxl8vKS0eQlk5e3/bfp1HuTyUtGr7JpZe/tv+3rnbZM2TzTadss24fJ+7Rp3h7ZAxAAAAAAPBmBQQBooOLiYh0+fNjxPj4+vtHLYLFYNHPmTBkMBo0ZM6bR9w9np/cWjI0IUJfW4TWuZ7FY9PvvK3XgwL4K8yIiojRy5CUKDAyqZE1YrbbSAOCp3n9pKXnKSndtADA41E8RUQEKj7IHmiKiAhQWGSCruVBZqUnKyUiWxVx8aswxs8U+3tjpY5dZzLKa7b/Lxiw7fbq1dOwym9XqsvKfyWyWXBmUq4ByZ6dGk5/kFaGi4lAVlQTLbA2TVUGqTbBQkrIyCpSVUaBD+1Id0+zpSJ2DhaQjrbuiQrPSTuY6goApJ3KUmZZf79ixQUXyMmTIxztL/r5ZkiVVVnOBY76tpJqV67qvsoBcJQG1ssCcycu7hoCcPcjmFHQrv41KAnIVptMjDwAAAADQBAgMAkADzZ07Vzk5OZKk7t27Kzo62m372rdvn1q2bOk0hmFubq4mT56sXbt26bbbblPXrl3dtv+ahIT4Ndm+zxSZuUXatC/FadqVQ9ooMrL6gF5eXp7mzp2n48ePV5jXsWMnXXLJJfL29nZpWWujrGeV0WhQRERgo+//dFarTZlp+Uo5kaOU0h5JKSdylXYyTxaL6wJoIWF+io4NVnRskKLiSn/HBMlotCkjJUnpJxKVdmK39q9PVHpygvJyMl22b5xitRRKliR5K0nepWetRpOPvPyiZLGGKTc/UMXmEFlsIZJq1+vJno40RynJOU7TA4N8FB0XrJi4YEU3K/0dGyxvH4I3eblFSk7MVnJilk4kZSs5MVsZp/WKrhuLvAyZCvDLlr9ftmRJVUlhpmOutY4d7w0GgyJimyumeVvFtmqnyJjmMnl5y1AW5PP2lpcjqOcto4m/KermTGsLce6gbsEdqFcAAKAmBAYBoAHS09P1+uuvO96PHTvWrfubMWOGFixYoO7duysmJkY5OTnatGmTcnNzddlll2nChAlu3X9NTCbSkS3dcEzmcmnzfLyMumhg62q/mxMnTujHH390BJjLGzp0qAYOHFiaUrLpGAwGmUyNVwab1aaM9Hx7AOdEriOQk3oyV2azCwOAoX6Kjgu2/8QGlf4Olo+PUdmZqUpLTlBa8gHtXJOgtOQEZaad8JgefHVhNJpk8irtMeVVmq7Q+1TvqrLpXl7ejl5VMhiUmZKs1ORjsphr3x3MailWcV6SpCT5GyR/b8lgNMk3IFoyRaiwKEjZuQEyW0NVl1PdvNxi5f2RpsN/pJ2aaJAiIgMV0yxYMc2CFdssRDHNghUeGXhOpiO12WzKzizU8YQsJSdm6XhilpITs5WTVdiQrcpkyFZwUK4C/HJksKaqMDdFNptVskgleXXfYnB4lGJbtFVsy3aKbdFO0c1by9un9mO4AvXV2G0hPAd1C+5AvQIAAFUhMAgA9VRcXKxHH31UaWn2m8gXXXSRLr74Yrfu86KLLlJqaqr27t2rLVu2KDAwUD179tRNN92kyy+/3K37rg1X9tg6G1mtNv3y22GnaUN7xivA16vK72b//n1asGCBzGaz03Rvb29deull6tChQ2kqTPeN7VYdo9Egg8Egm83m0pScZWxWm7IyC5x6/zkCgCWuq09Bwb6KjgtSVOypAGBUTJD8/L1VkJdT2gNwj/auT9CaE4nKOJmokmL3jhfaYAaDvErTEnqVBt6MZcE3k/ep116nUhyeCtJ5VzLt1PhjztO8SoN83hWCfI7UiPUYo6ysblnMZqWdTFJq0lGlHj/q+F1cVFDzRkrZrBYV5iZLSpYkhXjZvx//wEiZfKNVYglVTq6/8vKDZFMdUobapPRU+9iUe7YnOyZ7eRvtPUrjghXbzP47plmwAoPOnuCUzWpTelqekhOzS3sB2oOABfkNydlpk9FQoPDQfAX424OABTn29Loqkgrr8S/lFxCkmBZtFdO8rWJatFF087YKCAqpsFzZMdbdxyx4JuoV3IW6BXegXp3beBgXAOAKBputviOBAIDnslqtGj9+vH766SdJUqtWrfTNN98oNDS0iUvWtDIy8mU2W5q6GE1m24E0Tft6q9O0Z+7sq/bxFeuFzWbT1q0btW3bpgrzAgODNHLkpYqIiHRbWWsrIiJQJpNRFotV6en16NpTymazKTe7SOmpecpIzXf67coAoH+gtyKiAhURFajwqAD76+gA+fp5y2IuUVZasrJSk5SVelxZqUnKTE1SYV62S/ZtNJkUHB4rX//ACmOPVRx/zHncsbKxzCqMdeZ1aswzY2lgrmy8M4PR2OQ9SRuiurpls1mVl5WmjJMJyjiZqMyTCco4eUyF+RV71daVb0CYvP2jZVO4CoqClZXtrxKzj2o7bmF1/AO8HeMWRkYHKSLaXg+9vZs2daXFYrWn4C03HmDayTyVFDfseG0yligsLF8Bfjky2tJUmJesksLc+m/P5K3w2JaKiGulyLjWiohrrcDQyDrVc1cds4DyqFdwF+oW3IF6de7y8jIpPDygqYsBADgH0GMQAOrIZrNp0qRJjqBgfHy8PvroI48PCkJavjnR6X2rmCC1a1axZ0tJSYlWr16uo0cPVZgXExOn4cMvdhpH8mxis9mUl1usjNQ8pafkKb1cELChQYjy/Py9FREdUBoADFREVIAiogPl5+/tCCplph5XeuI2HdqapMyUJOVmpshVz0MFhkQoNCpeYdHxCo2KV2hUMwWHxTB2mYsYDEYFhUUrKCxaLTv1dkwvyM0qDRYm2IOFKQnKy0qrZksVFeVnqig/0/E+2Cj5hATJLzhWRq9IFZtDlJMbqKwsk+oaLCzIL1HikUwlHsl0mh4a7l8uYBioiOhAhYT5uyUdqbnEorSUPKWeKA0CJucqPSVXFkvD6r6Xl03hEUUK9MuWUekqyktWQU6abLlSXj1igQaDQSGRzZyCgKGRzfgfAgAAAADAzQgMAkAd2Gw2TZ48WV999ZUkKS4uTp988olatGjRxCVDU0vLKtTWA6lO00b0aV6hp0tubq6WLVugjIyKwYwOHbpo4MChMp0FN8ZtNpvy84rtvf5KA4AZpSkXi4tcFwD09fOyB/9Kg4ARUQEKjwpUQKA9HWRRQa4yU5KUlbpPCXvsPQGz047LXFLskv37+AWUBv7iFRbVzBEE9Pbxc8n2UTf+QaHyDwpVfLvujmnFhfn2QGFKgjJO2IOFOekn6hQELi7MVXFhrqQDkiSTpJggPwWGxsnbL1oWhSm/IEgZGd4qLKh7/c7KKFBWRoEO7Tt1jPDyMiqiNEgYEX0qYFhWt2tV7iJzaQ/AXKUm5yr1ZK4yUvPU0Pi3j49REZEWBQZky1QaBMzNTJYl06r69q8NCIlQZFwrRZQGAcNjWjIuIAAAAAAATYDAIADUks1m0/PPP6/Zs2dLkmJjY/Xpp5+qVatWTVwynAl+3ZrkdDPez8ekQd1inZY5eTJZy5cvVGFhodN0g8Ggfv0Gq0uX7mdUakibzSaL2aqC/BKdSMrSkYPpjvHWMlLzVVRornkjteTj61WuB2CAIx2of6C3DAaDzCXFyk5PVlbqPu09nORIB+rKNKAhEXGlAUB78C8surn8AkPOqL8JKvLxC1Bsq06KbdXJMc1cUqzMlER7sPBkojJOHlNWapKsltoH9czFhcpKOSzpsGNasMlb8c3j5BsYI5kiVFQSouwcP2WkFctirltKXLPZqpPHc3TyuHN6VP9Ab0eQsHw60pJii6MXYNlPVkbtx2Gsin+AlyKjjAoMzJHJlqbi/BPKTktUcVqRiuvWGdPBxzdAEaVBwMhmrRUR20p+gRV7TwMAAAAAgMZHYBAAaqEsKDhr1ixJUkxMjD799FO1bt26iUuGM4HZYtWv25Kcpg0+L05+Pqea2f3792jt2lWyWp2DBz4+Prpg6CjFxMSrsMAsi8Uqq8Uqi8VW+tv+2mK2ls6zOf22ON5Xsk755czO61hrua6refuYHL3+IqJLU4BGBSogyEcGg0E2m1W5mWnKSj2qQ9uTlFk6FqBL04CGRpYG/0oDgFHxCgqPltF45vfURO14efsoKr6touLbOqZZLRZlpyc7pyI9mSBzSVGtt2uxlCgz5ZiUcswxzWAwqmV0jILCmsnoEyWzNUz5BYHKSDPXK3BXkFeihLxMJRzOrPO6NQkO9VNklLeCAnNkUrqKC04oKzVB+cnZyq/nNo0mL4XHtLAHAUt7AwaFRRFQBwAAAADgDGWwueouGwCco04PCkZHR2vmzJlq27ZtDWt6noyMfJnNrksj6S42m005WYWlwbayYFkNQTezTVar1WmdsuVOpudr79EMGWSQQZJRUru4YHmZjLJYLCq2JchiOlmhHFazjwrTmstmqX3qwLOFt49J4ZHlegCWBgEDg30dAYPC/BxlpR5XZmqislLsAcCstGRZzK5KAxpYLv2nPQhIGtAzR0REoEwmoywWq9LT85qkDPZAdGqFYGFRQT0GzTtNYGikQqOayzcwRjZDhAqLg5WVKaWl5Kkwv6Thha9BWGSAoqL9FBSUL5MhXSUFJ5WVckw5GRWPRbVnUGhknD0daLPWioxrpdDI+DNqXMAzoV7h3EO9grtQt+AO1Ktzl5eXSeHhAU1dDADAOYAegwBQgylTpjgFBT/99FOCgmex9NQ8zft6h3KyCmteuA4iZXR6n5acKxks8g1Pkpdfxb445sJAFWU0k2xnzg31+vDyNio8snTsv3I9AINCTgUAzSXFyk5L1smj+xwpQLNSk1SYn1PD1mvHaPJSSGTcqRSgpYFA0oCiJgaDUcHhMQoOj1Grzn0k2R8cKMjNdAoUZpxMUH5ORp22nZeVprws51ycfoEhat+yhb13oXekSqyhys72so/VmZpf53SkkmQ0GhQRFajI2ECFBBfJy5ihkoITyjh5TJmHE5Vurf/DGgHB4eV6ArZSeGxLAusAAAAAAJzlCAwCQDVeeOEFffHFF5JOBQXbtWvXxKVCQ6xfedjlQcHKGEzF8otMkNGrYs+gktxwFWdHSzp7glZeXkaFRQacSgNa2gswONTPEXyzWq3Ky0pV1sm9OrLr1DiArksDalBQaOSpFKDR9gBgUFgUaUDhMgaDQQHB4QoIDlfz9uc7phcV5NnHLDxRFiw8ppyMFEm1r9uFedk6fmiXpF2Oad6+/oqMaaH2bZrLJyBWNkO48gr8lJFaoLSTucrOPHW88vIyKjImUJGxQQoLs9mDgPknlHFyq9IPHVVKce3Top7O29ffkQq0bHxAf8YFBAAAAADgnENgEACq8Oabb+qzzz6TZL9RfOedd+rgwYM6ePBgtet169ZN8fHxjVFE1IPRZKx5oQYy+ebJNzxJBqNz7x+bzaDizFiZC0JrtR2DQTKZjDKaDKW/jTKVvi4/3eRlKJ1nn19+uZrXNcpoNMjkVfk6kRGB8vH1UkCQjzIzT/V8tKcBParjB+wBwMzU48pOOy6L2TUpEn39A51SgIZFxSskspm8fXxdsn2grnz9AxXbqrNiW3V2TCspLlJWauKpYGFKgrJTj8tah156JUUFOnlsv04e2++YZvLyVmhUvDp3aKGQyOYy+UTKZilWUX6yMk7sUdrhI0rOy673Zyk/LmBEXCtFxrVWUFg0PWwBAAAAAPAABAYBoAqbNm1yvLbZbHrjjTdqtd4rr7yi66+/3l3FQgMNGt5W2RkFOnncnsbSYFBpcMweYKsQODNVHXSTQfpt1wkVW6yySrLJpo7Ni2SwJVTYr7e3r7p3Hqzw8KiKgTov5/04AnNG+016e287m/23zb4f2WzleuHZZF/Eau+7VDrdab2y91XMs2/PKslSuoh9nkEWFeYW6dj+Y0o6ctgRBCxyURpQk8lbIZGxCo1u7jQeoF9AMEEKnPG8fXwVFd9OUfGnepJbLGZlpx0v7VWYqIyTx5SVkihzSe3HzrSYS5SefETpyUdcUEqDQiJjS1OC2nsChkbFy2TiMgAAAAAAAE/EHQEAgEfxDzCqQ9tjCvH+Qzar1RFkk04Fzkrf2V9bbJLFPs8myWyzymxfWIXFFjU3FktGySCbFBQjg61i6j2jpUg+uUf0x2+7SrdfGuArDc45BftK3zsF7c4JBgWFRZUbA7BZaRrQaBmN7u/FCTQWk8lL4TEtFR7T0jHNarUqNzPltHELj6m4sOL4ow3lHxTmGBMwIq61ImJbytvX3+X7AQAAAAAAZycCgwBQhZkzZzZ1EeAG29f8rH0bl7lse/4GyWYwqSSklWzegRXmG4uy5JWTIHMdxiE72/n6B1UYBzA0Mk5e3qQBhWcyGo0KiYhVSESsWnfpK8n+YEB+Tka5QKH9pyA3s9bb9fb1V0RsK6fegP5BtUtVDAAAAAAAPBOBQQCAR8lJP+nS7VlNfioJaSWZfCrMM+WdkKkgRedqQkyTl7dCIps5pQANi2omv8CKvSYBODMYDAoMiVBgSISad+jhmF6Yn1MhWJibmSKjyaSw6BalvQHtPQKDw6NlMNDjFgAAAAAA1B6BQQCAR2nfY4iOH9rpkm1ZfEJkDm4hnX5j3maVV06CTMXZLtlPUzMYDAqJiFFweJwjBWhYdLwCQ6NIAwq4mF9AsOLadFVcm66OaRZziQxGo4xGUxOWDAAAAAAAnAsIDAIAPErz9ufrynsmKSXxgH1sQYOhtEefQQaD4dR7Q+n7SuYVlVj0w/JtigosqrB9P19f9T7vPIUEj7Kv6VjPoNINO7Zr/1227bJ5kgxGR5lksC9nX//0MpVbtrJtltu//bWx3DLl55Vuq9x6ZfuKiAiSl5dJVqtN6el59f/iAdSbycu7qYsAAAAAAADOEQQGAQAeJyg0UkGhkfVat6SkRD/8PL/SoGBMTJyGD79Y/v7+DS3iGcNoNJYGCz1njEQAAAAAAADgXEVgEACAWsrNzdWyZQtUkJ1WYV6HDp01cOAFMplI9QcAAAAAAADgzERgEACAWjh5MlnLly9SYWGB03SbTWrZrocGDx5YLs0nAAAAAAAAAJx5CAwCAFCDP/7Yq99/Xymr1eo0vcRqVGJxS91xAUFBAAAAAAAAAGc+AoMAAFTBarVq48a12r17e4V5+SXe2p7eTFcN6yIjQUEAAAAAAAAAZwECgwAAVKK4uEi//rpESUkJFealFwZoV0asZPTS0B7NmqB0AAAAAAAAAFB3BAYBADhNdnamli5doOzsrArzjuWG6WB2pGwyaFDnGIUE+DRBCQEAAAAAAACg7ggMAgBQTlJSgn79dbGKi4udphsMRu1Oj1JyQYhj2ojezRu7eAAAAAAAAABQbwQGAQCQZLPZtGfPDm3Y8LtsNpvTPD8/f6UZ2iu5oNAxrXl0oDq2CG3sYgIAAAAAAABAvRmbugAAADQ1i8Wi3377VevX/1YhKBgeHqmhw6/QxoNFTtNH9Goug8HQmMUEAAAAAAAAgAahxyAAwKMVFBRoxYpFOnkyucK81q3basiQEZq/NkHWcgFDX2+ThpwX15jFBAAAAAAAAIAGIzAIAPBY6elpWrZsgfLycivM69mzr3r06COrzaYVW5Oc5g3sFit/X5pQAAAAAAAAAGcX7moCADzSkSOHtHr1MpnNZqfpXl5eGjp0hFq3bidJ2rY/VRk5zmlER/Zu3mjlBAAAAAAAAABXITAIAPAoNptN27dv1pYtGyrMCwgI1KhRlyoiIsoxbdmWRKdl2jYLUeu4YLeXEwAAAAAAAABcjcAgAMCj/PHH3kqDgtHRsRox4mL5+wc4pp3MLNDOg+lOy9FbEAAAAAAAAMDZisAgAMCjHDlyqMK0Dh06a+DAC2QymZymr9iSKFu59wG+XurfNcbNJQQAAAAAAAAA9zA2dQEAAGhMMTGxjtcGg0H9+g3W4MEXVggKlpitWrn1uNO0oec3k6+383IAAAAAAAAAcLagxyAAwKOcf35v+fr6KTs7U+3adVRkZHSly23cd1K5BSVO00b0jm+MIgIAAAAAAACAWxAYBAB4FIPBoM6du9W43PJNiU7vu7QKU7PIQHcVCwAAAAAAAADcjlSiAACcJjElV/sSspymjejdvIlKAwAAAAAAAACuQWAQAIDTLN+c5PQ+JNBHfTpVnnIUAAAAAAAAAM4WBAYBACinqNiiNTuPO00b1qOZvEw0mQAAAAAAAADObtzlBACgnLW7T6igyOJ4b5A0vFd80xUIAAAAAAAAAFyEwCAAAOUs25zo9P789pGKCvVvotIAAAAAAAAAgOsQGAQAoNSh49k6kpzjNG1k7+ZNVBoAAAAAAAAAcC0CgwAAlDq9t2BkiJ/ObxfZRKUBAAAAAAAAANciMAgAgKS8whKt23XCadrwXvEyGg1NVCIAAAAAAAAAcC0CgwAASFqzPVnFZqvjvclo0LAezZqwRAAAAAAAAADgWgQGAQAez2azafkW5zSifTpFKzTIt4lKBAAAAAAAAACuR2AQAODx9h7N1PG0fKdpI3o3b6LSAAAAAAAAAIB7EBgEAHi803sLxkUEqEursKYpDAAAAAAAAAC4CYFBAIBHy8or1sa9KU7TRvRuLoPB0EQlAgAAAAAAAAD3IDAIAPBoq7YlyWK1Od77eBk19Py4JiwRAAAAAAAAALgHgUEAgMeyWm1avjnJadqArrEK9PNuohIBAAAAAAAAgPsQGAQAeKwdh9KUll3oNG1E7+ZNVBoAAAAAAAAAcC8CgwAAj7VsU6LT+9axwWrbLLiJSgMAAAAAAAAA7kVgEADgkVKzCrTtQJrTtBG942UwGJqoRAAAAAAAAADgXgQGAQAe6detSbKVe+/va9LAbrFNVh4AAAAAAAAAcDcCgwAAj2O2WPXr1uNO0wZ3j5Ofj1cTlQgAAAAAAAAA3I/AIADA42zen6rsvGKnaSN6N2+i0gAAAAAAAABA4yAwCADwOMs3Jzq979giVC2ig5qoNAAAAAAAAADQOAgMAgA8yvG0PO0+kuE0bSS9BQEAAAAAAAB4AAKDAACPsvK0sQWD/L3Vt3NME5UGAAAAAAAAABoPgUEAgEdJTM1zej+sRzN5e9EcAgAAAAAAADj3cScUAOBRurYOd7wODvDWn/q2aMLSAAAAAAAAAEDj8WrqAgAA0Jgu7t9CESG+SkrNU/+usYoI8WvqIgEAAAAAAABAoyAwCADwKCajUQO6xjZ1MQAAAAAAAACg0ZFKFAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD0BgEAAAAAAAAAAAAPAABAYBAAAAAAAAAAAAD+DV1AUAAJw7TCaeNzmXeXmZmroIOEdRt+AO1Cu4A/UK7kLdgjtQr84tXG8DAFzFYLPZbE1dCAAAAAAAAAAAAADuxaMmAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAfwauoCAACAs9POnTu1Zs0abd++XTt27FBiYqIkacmSJWrRokUTlw5nqy+//FK//fab9u7dq7S0NOXl5Sk0NFTnn3++xowZo5EjRzZ1EXEWeuqpp/T9999XOf+WW27RlClTGrFEOBesXbtWd955Z43LDRw4UJ9++mkjlAjnkqNHj+rdd9/VmjVrlJaWprCwMA0cOFAPPfSQ2rdv39TFwxmqvufnnNcDAOBZCAwCAIB6efvtt7VkyZKmLgbOMR999JGOHTumTp06qU+fPvLz89OxY8e0fPlyLV++XHfffbeefPLJpi4mzlIXXHCBoqOjK0zv3bt3E5QGZ7uoqCiNHj26yvmLFi1Sbm6uBgwY0Iilwrlgw4YNuv/++5WXl6dWrVpp5MiRSkxM1Ny5c7V48WK9//771CtUqr7n55zXAwDgWQgMAgCAeunVq5c6deqk8847T+eff76uv/56paamNnWxcJZ75ZVX1KlTJwUGBjpN37Bhg+677z7NmDFDl112mXr27NlEJcTZ7P7779fAgQObuhg4R7Rv315Tp06tdN7x48f1448/ymAw6Nprr23kkuFsVlhYqMcee0x5eXm655579MQTT8hotI8C8+OPP2r8+PEaN26cFi5cqICAgCYuLc409T0/57weAADPQmAQAADUy/3339/URcA5qKqeW/369dPll1+ub7/9Vr/99huBQQBntB9//FFWq1X9+/dXy5Ytm7o4OIssXLhQKSkpatOmjR5//HFHUFCSrr32Wi1ZskQLFizQ999/r9tvv70JS4ozUX3PzzmvBwDAsxhrXgQAAABoel5e9mfafHx8mrgkAFC9H374QZJ03XXXNWk5cPbZsWOHJKl///4ymUwV5g8aNEiStHjx4kYtFwAAAM4d9BgEAMDD5ObmavXq1Vq7dq127dqlw4cPKycnR76+voqJiVGPHj101VVXadiwYTIYDE1dXJwl3F2vdu/erfnz58tkMmnYsGFu+AQ4E7m6Xi1atEiLFi1ScXGxmjVrpqFDh6pHjx6N8ElwpnHnMWvr1q06dOiQ/P39ddlll7npE+BM5Ip6VVBQIEkKDQ2tdH5YWJgkadeuXW75DGganJ8DAIDGRGAQAAAP8tFHH+nNN99UUVFRhXlms1mHDh3SoUOH9OOPP6pfv3567bXXFB8f3wQlxdnEHfXq22+/1fr161VSUqLExERt2bJFXl5emjx5sjp27Oiuj4IziDvq1cyZM53eT5s2TcOHD9e//vUvx812nPvc3RaW9Ra8+OKLFRQU5Kpi4wznqnoVEREhSUpISKh0P2XTMzMzlZeXV2FMXpx9OD8HAACNjcAgAAAe5NChQ46bDrGxsRoyZIi6d++uyMhIFRUVacuWLZozZ47y8/O1YcMG3XHHHfrqq68UGRnZxCXHmcwd9WrTpk36/vvvHe/9/f01YcIE3XDDDW7/PDgzuLJedenSRZMmTdKgQYPUrFkzpaena926dfr3v/+tFStW6MEHH9QXX3zhNJYXzl3ubAuLi4s1b948SdLo0aPd+jlwZnFVvRo4cKCmT5+uFStWKCUlRdHR0Y55ZrNZ3377reM9gcFzA+fnAACgsRlsNputqQsBAAAax6RJk5SQkKC7775bgwcPrvQmeGJiou655x4dOnRIknT99dfrlVdeqXHbQ4cOVWpqqpYsWaIWLVq4vOw4c7mzXuXn5+vIkSOaOXOmvv32Ww0bNkz//e9/5efn5/LPgTOLO+tVmRMnTuiaa65RZmampk2bpssvv9xl5ceZy511a8GCBfrb3/6muLg4LVu2jGCzB3FVvbLZbBozZoy2bNmiTp06adKkSeratasSExP1xhtvaPXq1SopKZEkrV69WlFRUe7/cHCrM/H8nPN6AADObQQGAQDwIJmZmbVKl7dnzx5de+21kuw9tX777Tf5+/tXuw43EDyXO+tVec8884y++eYbPfbYYxo7dmx9i4uzRGPVq1dffVUzZsyoc1ARZy931q2xY8dq6dKleuCBBzRu3DhXFBdnCVfWq5MnT2rs2LHasWOH03RfX19NnDhRzz77rAwGg7Zt2yYfHx+XfQY0jTPx/JzzegAAzm08vggAgAep7RhaXbp0Udu2bSVJBQUFOnLkiBtLhbNdY9Wr6667TpK0ZMmSOq2Hs1Nj1as2bdpIst+Ih2dwV91KT0/XypUrJZ06XsFzuLJexcTE6Ouvv9a7776ru+++W7fccovGjRunn3/+WQMGDJAktW7dmqDgOYLzcwAA0NgYYxAAAFQqKCjI8bps3BOgoRpSryIiIiTZb74D5TWkXmVlZUlSnXoZwnPUpW7NnTtXJSUl6tWrl9q1a+fuouEsVpt6ZTQaNWrUKI0aNcpp+nfffSdJGjRokPsKiDMW5+cAAMAV6DEIAAAqKC4u1uHDhx3v4+Pjm64wOGc0tF6tXbtWkr2XBFCmIfXKZrNp4cKFkqTzzjvP1UXDWa6udeuHH36QRG9BVK8hxyyLxaKZM2fKYDBozJgxbigdzmScnwMAAFchMAgAACqYO3eucnJyJEndu3dXdHR0E5cI54Ka6tWOHTu0aNEimc3mCusuW7ZM06ZNkyTddNNNbi8rzh411atdu3bpp59+UnFxsdP03NxcTZw4Udu3b1dAQIBuuOGGRiszzg51aQv379+vnTt3ysfHR1dccUVjFRFnodrUq3379qmgoMBpWm5urp588knt2rVLt956q7p27doo5cWZg/NzAADgKqQSBQAATtLT0/X666873o8dO7bS5ZYvX6533nnH8b4sHd8jjzziGPNm+PDhevjhh91YWpwtalOvkpOT9cgjjygkJETdu3dXZGSkcnJydOjQIR09elSSdPfdd3PTHQ61qVdJSUl64okn9MILL+i8885TeHi4UlNTtXv3bmVlZSkgIEDTpk3jBiuc1LYtLPP9999LkkaNGqXQ0FC3lg1nr9rWqxkzZmjBggXq3r27YmJilJOTo02bNik3N1eXXXaZJkyY0FhFxhnC3efnnNcDAOBZCAwCAACH4uJiPfroo0pLS5MkXXTRRbr44osrXTY9PV1bt26tMH337t2O14yxBKn29er888/XI488onXr1unQoUPauHGjjEajYmJidO211+rmm29Wv379Grv4OEPVtl517txZd9xxh7Zv3659+/YpMzNT3t7eat68ua677jrdeeedatGiRWMXH2ewurSFkj29408//SRJGj16dKOUEWefutSriy66SKmpqdq7d6+2bNmiwMBA9ezZUzfddJMuv/zyxiw2zgCNcX7OeT0AAJ7FYLPZbE1dCAAA0PSsVqvGjx/vuLnZqlUrffPNN/R8QINQr+AO1Cu4C3UL7kC9Qn1RdwAAgDswxiAAAJDNZtOkSZMcNx3i4+P10UcfcdMBDUK9gjtQr+Au1C24A/UK9UXdAQAA7kJgEAAAD2ez2TR58mR99dVXkqS4uDh98sknpNZDg1Cv4A7UK7gLdQvuQL1CfVF3AACAOxEYBADAg9lsNj3//POaPXu2JCk2NlaffvqpWrVq1cQlw9mMegV3oF7BXahbcAfqFeqLugMAANyNwCAAAB6q7KbDrFmzJEkxMTH69NNP1bp16yYuGc5m1Cu4A/UK7kLdgjtQr1Bf1B0AANAYCAwCAOCBTr/pEB0drU8//VRt2rRp2oLhrEa9gjtQr+Au1C24A/UK9UXdAQAAjYXAIAAAHmjKlCkVbjq0bdu2iUuFsx31Cu5AvYK7ULfgDtQr1Bd1BwAANBYCgwAAeJgXXnhBX3zxhaRTNx3atWvXxKXC2Y56BXegXsFdqFtwB+oV6ou6AwAAGpPBZrPZmroQAACgcbz55puaPn26JMlgMGjcuHG1uunQrVs3xcfHu7t4OEtRr+AO1Cu4C3UL7kC9Qn1RdwAAQGMjMAgAgAe54447tG7dujqv98orr+j66693Q4lwLqBewR2oV3AX6hbcgXqF+qLuAACAxkYqUQAAAAAAAAAAAMAD0GMQAAAAAAAAAAAA8AD0GAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAMQGAQAAAAAAAAAAAA8AIFBAAAAAAAAAAAAwAN4NXUBAAAAAADu07lzZ8frvXv3NmFJAAAAAABNjR6DAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAABZrVY9//zz6ty5szp37qzevXtr5cqVTV0sAAAAAIALeTV1AQAAAAAATau4uFjjx4/X/PnzJUlhYWH63//+p169ejVtwQAAAAAALkVgEAAAAAA8WF5enh555BGtWbNGkhQbG6sZM2aoQ4cOTVwyAAAAAICrERgEAAAAAA+Vnp6u+++/X9u3b5cktW3bVjNmzFB8fHwTlwwAAAAA4A4EBgEAAADAAyUlJenuu+/WoUOHJEnnnXee3n//fUVERDRxyQAAAAAA7mJs6gIAAAAAABrX/v37NWbMGEdQcPDgwfr0008JCgIAAADAOY7AIAAAAAB4kM2bN+v222/XiRMnJEmXXnqp3nvvPQUGBjZxyQAAAAAA7kZgEAAAAAA8xIoVK/TXv/5VWVlZkqRbbrlF06ZNk4+PTxOXDAAAAADQGAw2m83W1IUAAAAAALhH586dHa+9vLxkNpslSffff78ef/zxpioWAAAAAKAJ0GMQAAAAADyEl5eX4/Xhw4dVUlLShKUBAAAAADQ2AoMAAAAA4CGmT58uPz8/SdLChQs1btw4goMAAAAA4EEIDAIAAACAhxg8eHClwcGy9KIAAAAAgHMbgUEAAAAA8CCVBQf/8Y9/EBwEAAAAAA9AYBAAAAAAPAw9BwEAAADAMxEYBAAAAAAPdHpwcMGCBQQHAQAAAOAcR2AQAAAAADzU4MGD9e677xIcBAAAAAAPQWAQAAAAADzYkCFDCA4CAAAAgIcgMAgAAAAAHm7IkCF65513CA4CAAAAwDmOwCAAAAAAQEOHDiU4CAAAAADnOIPNZrM1dSEAAAAAAAAAAAAAuBc9BgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAPQGAQAAAAAAAAAAAA8AAEBgEAAAAAAAAAAAAP4OWuDVutVuXm5io7O1vFxcWyWCzu2hUAAAAAAAAAAABwVjOZTPLx8VFISIiCgoJkNLq+f5/BZrPZXL3RnJwcJSYmymazlf64eg8AAAAAAAAAAADAucVgkAwGgwwGg5o3b67g4GDXbt/VgcGyoKDVapXVag8KGgwGt0Q1AQAAAAAAAAAAgHOB1WqVzWaTwSAZjfbYmquDgy4NDFqtVu3bt09Wq1UWi01+fv4KCgqRr6+fDAaDq3YDAAAAAAAAAAAAnFNsNpuKigqVm5utwsICmUz24GCnTp1c1gHPpd34cnNzZbPZZLXag4KRkTHy8/MnKAgAAAAAAAAAAABUw2AwOMXX7Jk5bcrNzXXZPlwaGMzOznaMKRgUFEJAEAAAAAAAAAAAAKgDg8GgoKAQ2Wz2XoTZ2dku27ZLA4PFxcWOMQV9ff1cuWkAAAAAAAAAAADAI5QN02ez2eNvruLSwKDFYrFv1GiktyAAAAAAAAAAAABQDwaDwTGuYFn8zRVcGhgEAAAAAAAAAAAAcGYiMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDAAAAAAAAAAAAgAcgMAgAAAAAAAAAAAB4AAKDHmDjxg0aNKiPBg3qo/ffn97UxWmQss8xdux9TV2URnPddVdq0KA+uu66K5u6KDU6l+paZebOneP4fHPnzmnq4gBnrHP9WFAXtfkuzqbjvLuMHXuf43tqKOofANQOx8tz17nwt+Ucyv24vgWazrlwnPZUtTl20j41nCfGABqbV1MXAGeHuXPn6MUXJ9dpnYkTJ+uqq65xT4HOUFOmTNK8eT9VOd/b21tBQcFq3bq1+vTpp6uuulbx8fGNWMKzS0lJiVasWKYVK5Zpz57dSk9PV2FhgXx9fRUREaHmzVuoY8fO6tGjp/r27afAwKCmLvIZZd++vVqxYpkkafjwkerUqXMTlwioveraHaPRqICAAEVFRatTpy4aOXKUhg0bLi8vTmtqUlhYoP3792n37t3as2e39uzZpSNHDstisUiqW9tts9l07NhR7d69q3Rbu7V37x7l5+dJknr37qt3333fbZ+lIebOnaPjx5MkSffd96Db9zd27H3avHljndd79dU3NHz4SDeUSFqxYpn27dsrSRoz5nYFBwe7ZT+ukJSUpOuvv6rK+QEBgQoPD1fHjp104YUj9Kc/XSxfX99GLCHgebZs2awHH7xHktSlS1d9/PHn1S5/8uQJXXPN5Y73d911jx588OFq1/nxx+/1yisvSJIuvvhSvfDCKw0s9Znl/fen68MP36t0XkBAgAIDAxUUFKy2bdupc+cu6t27r3r06NnIpaza7NmfKycnR8HBwRoz5vamLs4Zp+xGf7Nm8R53XwSAe9EGN1x1bXBV3n77PfXt288l++d+Hc4E3EEDGlFJSYkyMtKVkZGuLVs267PPPtEDDzys22+/o6mLdsbZuXOHpkx5TkeOHK4wLz8/X/n5+UpISNDatb9LksLDIzR//uJGLuWZbd++vY4TnWbN4jnRwDnDarUqNzdXubm5Onz4kBYunK9Onbro5ZdfVYsWLZu6eGe0a665QtnZWS7Z1v/935uaNeszl2yrsf3880+OQF1jBAbPRCtWLHc8zHTlldec0YHBmuTn5yk/P0+JiQlavnypZsx4Xy+++Iq6dOnW1EUDzlndu58nPz8/FRYWav/+fcrNzVFQUNXHkY0b1zu937Sp5oclNm7c4HjtqhtxZ4uy652UlBQdOnRQS5far3Nat26jMWNu03XX3SCDwdCkZZw9+wslJx9XXFwzAoOVKLsO6927L4FBAC5FG3z2434dzgQEBlFnffv2080331rjcp07d3H5vn//fZPLt+kuN900Rv369XeaVlxcrKSkRK1YsUy7du1UcXGx3nrrTQUGBuq6665vopKeefbs2aVHH31Q+fn5kqSoqCiNGPEndejQUSEhISoqKtTJkye1Z89ubdiwTjk5ObJarU1cagDucnq7Y7ValZWVpe3bt2rx4oUqKirSvn179Le/PaTPPvtSAQEBTVjauvvhh58bbV9Wq8XpfVxcnEpKzEpLS23wtgICAhUbG6tDhw42qIwN1bdvvzP6fOH++x9S+/bta7Vs9+7nubk0Z5/w8HA99dREp2k5OTnauXOHFiyYV/rg0DH97W8P66OPZqp58xZNVFLg3Obt7a0ePXpp3brfZbFYtHnzJg0bNrzK5U+/Cblr1w4VFhbIz8+/ynXK97Tu06d/lcudCy666BJdfPGljvclJSXKzc1Ramqqdu3aqS1bNis/P09HjhzWq6++rMWLF2rKlFcUGRlZ6fbO9LawNs6FzwAA7kAb7Fqnt8FVKbuGu+qqa3jgA+cEAoOos9jYZm5La3Uu6dy5S5Xf01/+cremT39bH3/8oSR7F/ZrrrlORiPDfkrSK6+86AgKXnHF1XryyQlVpgQzm81av36tlixZ1JhFBNCIqmp3rrvuev35z3/RQw/dp4yMDCUlJerrr2frL3+5uwlKeXa48MIRatWqtbp06aouXboqLCy8xjTYVWnbtp3GjLldXbt2U5cuXdWqVWtt2rRRDz98vxtKfu7o2bMXT902gK+vX6XHg6uuukZ33nmXHnrofh0/nqTs7CxNn/72OZf2CDiT9OnTV+vW2bN3bNq0sVY3JYcMuUBr1qyS2WzW1q1bNXDgoEqXP3r0qFJSTkqSoqNj1KpVKxeX/szSunWbaq+x8/Jy9cMP3+m996arqKhQGzdu0BNP/F3vvvt+tTd2AQDnJtpg16mpDQbOVUQhgCZy770PKDQ0TJKUlpZaacpMT3To0EHt3btHkhQbG6cJEyZWO06Ql5eXBg8eqokTJzdSCQGcSdq2bae77rrX8f7XX5c3XWHOAs89N0V33XWPBg0aorCw8AZt67rrbtBjjz2uSy+9XK1bt2nylGZAs2bx+uc/n3a8X7lyhYqLi5uwRMC5rfxDDuVTjp3uxIlkJSYmSJKuuOIqxcU1kyRt2rS+ynU2bSKFWXmBgUG6/fY79cEHHyskJESStHv3Lr311rSmLRgAoEnQBgNoKHoMwiE9PV3jxj2qPXt2S5KuvXa0xo+fIJPJ1OBt7927R7/9tlrbtm3VoUMHlZ6eLqvVopCQULVv30FDh16gq6++rsb0b4MG9ZFkz9P/7rvvV7pMWlqqvv/+W61d+7uOHj2i3Nxc+fn5KSwsTOHh4erUqYsuuGCYBg0aUmkPvYSEY/r++2+1adMGJSQcU35+gQIDAxQSEqqoqCh16dJNw4ePUO/efRv0nXh5ealVq1bavj1TkpSXl9eg7UnShg3rNG/ez9q2bbPS0tJks9kUFRWlHj166fLLr1T//gNrva01a1Zr6dJF2r59m9LSUlVQUKjg4CC1bt1GvXr11p/+dEm9c2BXV9cOHz7kWO68886Xl5d3vfZRmaKiIs2b95NWrfpV+/fvV2Zmhry9fRQbG6u+ffvrpptuUatWrV22v5UrV2jp0sXavn2r0tPTZbFYFRERoR49eurKK6/RgAG1+3sUFxdr/vyftWbNKu3du0eZmRmyWCwKD49Q+/Yd1K9ff11yyeWKjo6WJM2dO0cvvjjZaRsvvji5wjTp7ErPi3OPq9qdPn1OHY+PHTta5XI2m03btm3V2rW/afv2bTp8+JCysjJlMBgUEhJa2tN7hC677Ep5e9fu2PP772v0/fffaseO7crJyVZ4eIS6dOmq66+/UQMHDq7VNq677krHGD2npxX9/POZeuutNyVJvXv30X//+78qv5+jR4/orrtuV35+vvz8/PTRR5+pbdt2tSrDmcZqtWrevJ80f/7POnTokHJzcxQREaE+ffrr5ptvqXbsuI0bNzh6Ld5zz/1OYwiOHXufU1oc6dS5RXlXXHG1nnvueadp+/fv048/fqctWzbr+PHjKiwsVFBQkMLCwhQdHa3u3c/TyJEXqUuXrg356NV64onHtGrVr5Ls2QfGjn2kymUXLvxFzz03QZI9gP7RR5/Jz8+v0h6i119/VYX1Tz/XKv/d/f77JlmtVv3yyzwtXDhfBw4cKG3nzE7titls1saNG7Ru3e/atWuHjh49qqysTHl5eSksLFzdunXXRRddohEjRrksa8KAAQPl6+uroqIiFRYWKiHhmNq1s6f9ycvL05o1q7Rx43rt3btHiYkJysvLl7+/n6KiotWjRy9de+3oWqVwPVvONwFXqKq97tq1uwICApSfn68//tinnJycSscrLX+DsU+ffurTp6/mzZtb7RhH5eedflMyMzNDq1b9qk2bNmr//n06fjxJBQWFCgwMUGxsnHr37qvRo2+osQ2s63EtKSnJcbwsaydOnjypb775UqtW/ark5GRJUosWLTR8+EiNGXObAgODqi1DXXXs2EkTJ07W+PHjJElz5vygO+/8q2Jj45yWq64tLK8+bVvZeUuZ5OTjlbalp+/39Gv5nJwcff/9t1qxYqmSkhKVmZmpXr36ONqe2n6G8oqKivTDD99q0aIFSkg4poKCAkVHR2vAgMEaM+b2anu9lG8fv/turuLj46tctvz11sSJk51Syp3+XWzevLHS7+f09cpYLBYtWrRAv/66XLt27VRmZoYMBoOio2PUq1cfjR59g7p2rd0YuosWLdDcuT9q3769ysvLU2RklHr16q0bbrhZ5513fq22AaBp0QafOW1wmeragLqsW6au9+vS0lL1ww/fae3a35WQcEzZ2VkKDAxSq1atNHjwBbrxxpsdDxFVpj7XdWX27duruXPnaNOmDTp58oQKCgoUGhqqTp26aOTIUbr88qvk5VVzuOnQoYOaPftzrVu3VmlpqQoMDFKbNm106aVX6Oqrr3VJLAI1IzAISfabE4899rASEuxPkdxzz326776xLtn2hx++p/ffn17pvLS0VKWlpWrdut/12WefaOrUNxo0ns6aNas1ceJTys93DrLl5eUqLy9XiYkJ2rFju7777mv98suSCr0lfvrpB7322tQKT5dnZ2crOztbCQnHtGXLZv3443datmx1vctZJjMz0/E6Li6u6gVrUFhYoMmTn9Xy5UsrzEtISFBCQoLmzZurESNGafLkF6pNN5OcfFzPPvu0tm/fVml5MzO3aOvWLfrkk4+0ePGKagc4rkxNdc1iOTVmVXp6ep22XZ1NmzZq0qRnHOkQyhQXF+vgwVwdPHhA3333te6/f2yD0xCeOJGsiROfqvQ7PH48ScePJ2nBgvkaOfJPmjRpSrV/j40b12vy5GcrlFuSTp48oZMnT+i331ZrwYL5+vTTWQ0qN9CYXNnu+Pj4OF4XFRVVudxLLz2vuXPnVDovJeWkUlJOatWqX/XZZ5/qtdemVXvzyGq1aurUlzRnzvdO00+cSNaJE8lasWKZbrnlVl14YcNSktx225+1fv1a/f77Gm3evEkfffSB7r33gQrLlZSUaOLEpx1pmMeNG3/WBgXz8nI1fvy4Ck++Jicna968n7RgwTzdd9+DuuuuexqtTDNmvK8PPvhfhfFss7IylZWVqSNHDmvDhvVaufJXffHF124rx7PPTtaf/zxGKSknNXPmxxowYKD69q045kdCwjFNnfqSJMnX11cvvjhVfn5+LitHdna2nnxynDZvrv7hkr///aFKn2A2m81KTj6u5OTjWrp0sc4/v6emTn29yrGy6sLLy0shISFKSUmRJOXm5kqy/49cccVFlR4jcnNzlZubq8OHD2nOnO81evQNevzxJ6u8qD3bzjeBhqipve7Ro5d+/32NrFarNm/eqAsvHFFhG2U3GNu0aauIiAj17t1H8+bN1a5du1RQUCB//4rnwuUf4ih/nEtMTNDNN18vi8VcYZ2y/6H9+/fp669n6777HtTdd99Xq89Z2+NaeRs3btCECeOVlZXpNH3fvr3at2+vfvjhW73++n/UuXOXWm+zNi68cIS6dOmqPXt2q6SkRPPn/1yvNrEp27a9e/foySfHOW7kusLJkyf0+ON/1/79+5ym26+Hv9bPP8/R+PETdOWVV7tsn6524MAfmjBhfKXZhI4ePaKjR49ozpzvdeONt+gf/3iiyhunhYWFeuaZ8Vq9epXT9LJr0YULf9FDDz3qyGAE4MxEG1y1pmqDm9qXX87Su+++pcLCQqfpWVmZ2r49U9u3b9Ps2Z9pypSXNWjQkBq3V9vvvri4WK+99ormzp0jm83mNC81NVWpqau0Zs0qzZr1uV577c1qx3n/8cfv9frrU1VSUlJu++nKyEjX5s2btGDBPL322ps1lh0NR2AQ2r17l8aN+5syMtJlMpn0xBNPavToG122/cLCQplMJnXr1l09evRUy5atFRwcLKvVquPHk7Rq1Upt27ZFKSkp+sc/HtXMmbMqPPFYGykpKZo48UnHTdHevftq6NALFBkZKW9vH2VmZurgwQPasGGdjh49UmH9vXv3aOrUl2SxWGQymTRw4GANGDBQ4eERMhqNSk9P1x9/7NO6dWuVnZ3V4O9l48b1jp4t7dq1V1RUdL22Y7FY9I9//M3RcAcEBOiKK65Wt27dZTAYtGvXTv388xzl5+dr+fKlGjcuW2+99W6lFxGJiQm69967lJFhD8iFhITq4osvUZcuXRUYGKTs7Czt379Pq1evUnLycZ3WFtSoNnWtRYuWjtfbt2/Vzp07GhQsluw38MaP/4fMZrOMRqMGDRqi/v0HKDo6RsXFxdq9e5fmz5+r3NxcvfvufyWp3sHBEyeSdc89dyo1NVWS1KmTvQdSixYtZTQadeTIYc2f/7MSExO0bNkSFRQU6M0336o0Bd+KFcs0YcKTjpOuVq1aa9Soi9S6dWt5e/soLS1VO3fu1Jo1K50a5n79+uvVV9/Qhg3r9fXXsyVJN900Rv36ndsDRuPs4ep25+DBA47X1bUfhYWF8vb2Vs+evdS9+3lq0aKlAgODVFxcrISEY1q+fKn++GO/jhw5rHHjHtEnn3xR5VOG06a94QgKmkwmXXrp5erTp6+8vX20b99e/fTTj/ryy1k6ceJEvT+XJBkMBj333BTdcccYpaWl6qOPPlC/fv3Vq5fzk+dvvTVN+/bZ0zBfdNEluuaa6xq036b0wguTtXHjBrVo0VJXXHG1WrZsqdzcHK1atVKrV6+UxWLR9OlvKyAgUDffPKZO237ggYeUlZWp//3vHUe9efXVNyosV74e/frrCr333ruS7EG2Cy64UD179lZ4eLisVqtSU1O1b98erVu3tgGfunZCQ8M0efKLevTRB2W1WjV58kTNnDnbKfBkNpfo2WefdgSt/v73x9W+fQfH/JtvHqPhw0foq69mOYJ2Tz31jMLDIyrsqyqTJj2jzZs3qV279rr44kvVokVL5eXlVeiNWVhYKH9/f/Xp01ddunRVs2bNFRAQqMLCAh0+fEhLly5SQkKCtm/fqqeeelzvvvtBrZ4wrY7ZbFZ2do7jfVCQ/X/YarWqqKhIERGR6tdvgDp27KioqGj5+vopJydbe/bs0pIli5Sdna3vv/9WAQGBevTRxyps/2w73wQaojbtdZ8+/fT772sk2XslVHdTsqwHbNlvi8WsrVs3V7hxVX5so7i4ZoqPb+6YV1JSIovFrLi4OPXrN0Dt23dQRESkvL29lZGRoZ07t2vJksUqKirUe++9q5CQUN144801ftbaHtfKnDhxXE8//U9lZ2dp6NBhuuCCYQoKCtaxY8c0b95PSkg4ppSUFD366Fh98snnatas6t5n9XHppZc7eo9s2rSxzoHBhrRtTz89UYWFhZo69UVlZGQoPDxcTz01scJyrVu3qXTf2dlZGj9+nE6cSNaAAYN0wQUXKiIiQmlpaY7r0LqyWMyaMGG89u/fp06dOuvSSy9XbGyc0tPTtWzZYm3evElFRUV66aXnFRwcXGk9dZWy84onn3xckv1a/4EHHqqw3Ok3q/fu3aOxY+9ztN+9evXWkCHD1KxZM1mtVv3xx379/PNPSk9P0zfffCmzuaTS712SnntugiMo6Ovrp6uvvsZxTb1jx3bNnfuT3nprGmNqAWcw2uAztw1uiIbcr5s+/W19/PGHkiR/f3+NHHmRzj//fIWGhik7O0vr16/T8uVLlZ2drccff0xvvz29wn2D09XmuzebzXrssUccvU+jo6N10UWXqkOHjvLz89PJkye0fPlSbd26RQcPHtCDD96rTz+dpfDwikOXLFu2RFOnvui4h9mvX3+NGPEnhYaGKikpUfPmzdXmzZv04ovPV1gXrtekgUGr1abcgpKaFzzHBPl7y2g8M8bh+f33NZowYbzy8/Pl6+urF154xeUnySNH/kk333yrI83h6e6886+aP/9nvfDCJGVnZ+mDD97TM888V+f9LFw436mnRHU3C3fs2C5/f+e0pT/99IOjt9rUqa9XOXCvzWbTli31S79YXFys48eTtGLFcsfB3NvbW3//+7h6bU+SZs36zHHAbtYsXm+//T+nhvvyy6/Urbferoceul/Jyce1adMGffHFTN1xx11O27FarXr66X86LsaGDRuuSZOmVNoj8J//lNasWeXUS6cmta1rnTt3UZs2bXX48CGZzWY9+uiDGj36Ro0cOUpdunStc2rR1NQUTZ48UWazWeHhEXrttX/rvPN6OC1zxRVX6Y477tI//vGIDhz4Q++9966GDx+pNm3a1mlfNptNEyc+pdTUVJlMJv3zn0/ruuuur7DcnXf+VS+8MEmLFi3Q77+v0Zw5P+jaa0c7LXP8eJKef/45R1CwrHdMZQHdoqIibdiwzvE+Lq6Z4uKaKSfn1I1Re4pELvwkyWqzKq8kv6mL0egCvQNkNDT90MKubneKi4s1c+Ynjvc9e/aqctkbbrhJ48dPqDKtxj333K+ZMz/WO++8pYSEBH355Wzdffe9FZbbunWL4yTe399fb775X/Xq1dsx/9JLL9ett96uhx9+oNKe3HUVERGhSZNe0N///pAsFosmTbIHg8o+x8qVK/TVV/Yew/HxzfXUU880eJ9NafnypRo+fKSmTHnZaYzZ0aNv1MKFv+j555+VxWLR22//ny64YJhTm1eTsr/T7NlfOKbVdGz88cdvJUkmk5f+978ZVaYKtVgs2rFje63LUl99+/bTXXfdoxkz3ldKSopeeGGy3njjP47577zzX+3evUuSNGLEKF1/vfMNhC5duqpLl65asWK5Y9qAAYOrTZl2ut9+W60bb7xZ//jHP53apdPbvAceeFg9evSosmf8/feP1bRpr+ubb77S9u3btGjRAl1++ZW1Lkdl1q9fq6Ii+xO0vr5+jgeOvLy89O9/v6VBgwZXmtrz2mtH68EHH9YTTzxW+qTt57rxxpsr3Eg4G843zxVWq01FHnit6HuGXCvWtr2uaYyj8mMblaX+btGipaKjY5SSclIbN26ocFOyurGNwsMjNH36h07tbnk33HCT7r33QT322MM6evSIpk//r6644qoah6yo7XGt/Gc1mUyaMuVlXXLJZU7zbr/9Dj333AStWLFM2dlZevXVlzRt2tvV7r+uzj//1PXM7t0767x+Q9q2slTpb775uiT7sbYu1xkHDvwhk8mkyZNf1GWXXVHnslcmJSVFKSkplfaku/nmMZo582O9/fb/yWq16pVXXlTfvv3clmLu9O8iNDSsxu+nsLCg9P8tT35+fnrhhVcqtA+XXnq5/vKXv+rJJx/Xxo0b9MMP32nUqIsrDE+xcOEvjjG3IyOj9Pbb/3O6rr388qt044236OGHH9CKFcsa8ElxLrNZbbIVVewVdq4z+HrJQBtc6fYl2mBXqO/9ut9+W61PPpkhyT7s0ssvv6aYmBinZa677gZt3bpF//jHo8rPz9Pzzz+nr7/+odoHL2vz3b/33ruOenHttaM1btx4p+t0Sbr11j/ryy9n6c03X1NKyklNm/a6nn/+JadlcnNz9K9/vewICj722OMaM+Z2p2Vuu+0OTZ48UUuWLKr2+4BrNFlgcN2uE/p0wV5l5xXXvPA5JiTQR3de2lkDusU2aTnmz5+rl16aIrPZrJCQEP3rX29WeXAvb968nyqMTXO68v/c3bp1r3Gbl19+pdau/U2//DJPixb9oieffLrOAaCEhGOO19dcc221y1aWT79s/fDw8Cpv0kj23hu1Ge+lqhzRZXx8fDR06AX661/vrRCoqi2zuUSzZn3uKNcLL7xS6Q3S+PjmevHFV3TffX+VzWbTrFmfa8yY253G0FqyZJH27dsryd7L7eWX/1XtGFtDhlxQ63LWpa4ZDAZNnDhJjzzyoAoLC5Wfn6/PP/9Un3/+qby9vdWhQ0d16dJV55/fU/36DajQEJ7us88+dTxx/8or/6ryu46JidFLL72q22+/WRaLRV9+OUtPPjmh1p9Rklau/NWRPvSeex6o8iTGx8dHzz03RTt2bNfx40n64ouZFQKDn376seNp0RtuuEn33HN/lfv19fXV0KHD6lRWT7Uxeatm7fleOcW5TV2URhfsE6Rbu4xW37ieTVaG+rY7p7NarcrOztK2bdv08ccfateuHZLsN/5vv/3OKter6dhtMBh0551/1erVK7V16xbNm/dTpYHBL76Y6TiZffjhv1X6GaKiovXii1N11123O6VIrq8BAwbqjjvu0qeffqQTJ5L10kvP69VX39DJkycdT9OZTF6aMuXlOqd4PtPExsZp8uQXK1xsSNIll1ymXbt2avbsz1VUVKivvpqtxx573K3lKUvd07lz52rHDzSZTNUGpsuUjZdUk8rGnCxzzz33a+PG9dq6dYtWr16pL7/8Qrfccpt++221Zs36rHT9OE2YUPcHrWqjc+cuGjdufI3jAtY0lq6Xl5cee+wJrV69SsePJ2nevLkNCgyeOJGsN9541fF+2LALHQ8xmUwmDRkytNr1Q0PD9NxzU3TTTdfJYrHol1/m6a9/dT4GnGnnm+eq/btOasWCfSrI87zAoH+gt4Zf2kkdu1V/jutOdWmvyzKL5OXl6o8/9isrK0uhoaGO+c5jG52q071799HChb84zT+1TuUpzCQpNDS0xnOH+Ph4/fOfT+nRR8cqNzdXv/66vMYAVG2Pa+XdfPOtFW5ISvZz88mTX9SYMTfoxIlk/f77b9q/f586duxU623XJC6umeN1Tk6OzOaSOl0/u7ptq6sbb7zZZUHBMl27dtO4cf+s9G94xx13aceO7VqxYpkyMtI1d+5PuuWWW126/4b48ccfHDfvn3zymSrbh6CgYL388r90/fXXKC8vV198MbNCW/vZZ586Xk+Y8GylD7u2bdtOzzzznB5//O8u/BQ4VxQdylDumgTZCj0wMOjnpaAhLeTbtmJPp8ZCG1w7rmyDP/zwPX344XvV7u/VV99o0oftp09/WzabTeHh4Xrjjf9z+juX17NnL/3tb//Q1Kkv6vjxJC1btkQXX3xpldut6btPT0/Xl1/aH6rt33+gnn762Sq3dcstt2rnzh1auHC+Fi9eqIcf/rvTPduff56rjIwMSdKoURdVCApK9s4zzz47WTt3bndpunFUrsm6Dsz4ebdHBgUlKTuvWDN+3t2kZfj00481Zcokmc1mxcTE6n//m1Gvm7OuVHbBUVhYqD/+2F/n9cs/jV6WVqU+62dlZSkpKanO69eVyWSSr69fg1Jmbdu2TWlp9pSVffr0rXYA8fPO6+F42ic9PU3btm1xmv/LL6duPt533wPVBgXroj517bzzemjGjJkVnk4qKSnR7t279P3332rKlOd03XVX6NFHx2rr1i2Vbsdms2n+fPvnOv/8HjV2oW/Tpq0jkL127W+1/ISnlAXMfXx8dMst1ae38/b21iWX2BvnI0cOKzn5uGOexWLRwoW/OLZ1770P1rksqNzMXV97ZFBQknKKczVzl/vGPqtJQ9qdefN+0qBBfRw/Q4b002WX/Unjx//DERT08fHRyy+/VueevpXp2dNeroSEYxXGLCguLtaaNfbUTEFBQbrmmtGnr+7QsWMnDRgwqMHlKXP//WMdx/kVK5bpq69ma/LkiY4yPvDA2GrbgbPFDTfcXOlYF2Vuu+0Ox4XLsmVL3F6esrH5EhISnJ7sbEomk0nPP/+So9fof//7H61Zs0pTpkySzWYr7YnxUrWDzjfEjTfeXKcL9+p4eXk56u2uXTsrjFlxuqKiQq1Ysczp5+eff9Jrr72i22672XGzOzg4WA8++HCdy9OyZStFRkZJUqU9QM+2882z1dKf93pkUFCSCvJKtPTnvU22/7q21yaTSb169ZJkP/c+Pe1XWQ+G1q3bOP63JPtNSUnas2eP8vKcx+vcvLnyG5l1UdaWS5X/L5+ursc1o9Go2267o8r5/v7+uuGGmxzvXd1eBQc7H9+zsrLrtH5Tt2033+z6oFz584PK3HHHXxyvly1b7PL9N0TZdWR0dIwuvfTyapcNDQ3T0KH2h3Q3b97oNF5tUlKSI7V869Ztqn14dOjQYS45b8a5J3fVMY8MCkqSrdCs3FXHal7QTWiDa6ep2+DG9scf+7V3r/3Yfs0111UZFCxz6aWXy2Sy32uu6d5mTd/94sULHWO0//nPVT+EXebKK6+SZL+vWT6zmSSnbEq33fbnKrfh5+evG26oOQUtGo4xBj2MzWbTv//9miPlWLt27TVt2n8VE1P73ot9+/ar8US+/Fg2Zfv97bfVWrp0sfbs2a2TJ08oLy+/0gFrJenkyZPq0qVbrcskSQMHDnI8Jf/UU0/ojjvu0qhRF9U6l/TAgYO0fPlSWa1WPfTQffrLX/6qCy8cqcjIyDqVo0xlOaItFovS09O1fftWLV++VEuXLtaKFcv1xBPj6zW+1s6dpxrYspQu1Rk4cLA2bFgvyd44l3/6Z8uWLZLsT9fUZoDamjS0rrVr115vv/2eDh48oGXLlmjLls3avXuncnNPBXWsVqvWr1+rDRvW6b77xlbo2XPo0EHHDfPg4JBapUopaxCTkhJVVFRUaY+VqpSl/IqIiHB8z9UpPwbSoUMHHU/+/vHHfuXl2T/n+ef3qDQvN3C2cEW7U5MLLxyhJ554slbbNJvNWr58qX79dbn27dur1NQU5efny2q1Vrr8yZMnncZa279/n2OQ7B49etWYUrlfvwH67bfVtf8w1fDy8tKUKa/ozjvHKDc3V//+978c8/r3H1ghRfTZasCAAdXOj4mJUZs2bXXw4AGdOJGstLRUpwtdVxs4cLD27t2j7OwsPfjgPfrzn/+iCy64UMHB9euZef/9D6l9+/Y1Lld207YqcXHN9PTTz+rpp/+pkpISjRv3N8e8e+65360PfPXsWf2DNuUVFhZo8eKFWrnyVx048IfS09NVUJBfaQAwLy9XeXm51fZ6zcjIcIzbVJX4+OZ64YVXnMYtLpOSkqL583/Whg3rdOjQQeXkZKuwsLDS7ZSNr1LemXa+CbhKQ9rrPn36OcYz27Rpo0aMGOWYV9bz4PSbi85jHG1x9Og9evSIUlJSJEnNm7dw6hlX3rFjRzV//s/avHmTjh49otzcXEca4dNV9r98uroc1yR7j6uqhsoo07//QElvSZLjQSZXOf0YWtl45dVxddtWF9HR0WrevIXLt1tTL/Xu3c9XQECg8vPztGfPblmtVpc95NIQeXm52r9/nyQpKipKK1euqHGdsmBgUVGRkpISHQG+8vWsNuNV9es3QIcPH6pPsQG4EG1w07bBF110SbW96iQ5xmltCuWHF7BYrLW6txkQ4K+cnBwdOnSw2uVq+u7L7zs9Pa3GfZf/e5fft81m0549u0rLFqBu3ar/Pvv3r/6eAFyjyQKDd1/Z1eNTiTaFr76a5Qis9OjRS6+/Pq3OT5PHxjarU/fptLRUPf30+Ao91KpTFhSpi0GDhuiKK67SvHlzlZmZqbfemqa33pqm+PjmOu+8Hurdu7eGDLlAsbFxla5/9dXXasmSxdqwYZ2Sk4/r1Vdf1quvvqzWrduoR4+e6tWrj4YOvUBhYbUL0lSXI/rGG2/WkSOH9fDD9ys1NVWvvTZV7dp1qHOalrLegpL9CfeatGrV2vE6NfXUunl5eY7vvHnzFi7pLeiKuibZT4jatbPfQLXZbEpMTNCOHdu1Zs1qLVu2WCUlJbLZbHrvvXfUvHlzpycsjx8/9ST+mjWrHL18ais7O7vGk40yBQUFyszMlCQlJyfXeNOysn2VOXnyhON127bt6rQdVO+Objd5fCrRxuaKY8HpD6RkZ2fp8OHD+vnnOcrMzNTq1SvVs2evatOISvbeuU899USNJ8flnd4epaamOF5XFnQ4XcuWNS9TF/Hx8XrqqYmaOPEpx7Tw8AhNnvxCnW8Knqlq0561bNlKBw8ekGQP9LgzMHjnnXdp9eqVOnDgDx048Ieef/5ZGY1GdejQsfT8oo+GDBla67GKevbsVaFHfH2NHPknXX/9Tfruu1O9gfv0sY9B6E41pfEus23bVj377NM6caL2aWDy8vLqnA7X399fYWHh6tSps4YNu1AXXXRppYHV77//Rv/5z7+rDARWVpbTnWnnm+eqUVd29vhUoo2tIe218xhHpx6OS04+rqSkREn2Y1N5bdq0VUREpNLT07Rx43rHTUnntGeVHyvff3+6Pv54RpUPmZ6uNteWtT2ulanrOUDZjVZXOb2XX13PrVzdttVFdLTrh1QJCQlxepCrMgaDQS1atNC+fXtVWFionJycGntdNIYTJ044HlDbvXtXg64jm/o8FeeGoAtaenwq0cZGG9y0bXDr1m2aNE1oTcrf2/zss0/qtG75NqIyNX335ff9/PN1G6aibFgnScrNzVVBQYEk+0OcNT2YU5u/MRquyQKDA7rFql+XGOV64IDyQU04oHz5cY4KCwtls1XeQ8JVzGazHnvsEccTcCEhIbrgggvVvn0HRUREytfXTyaT/WCwYcN6ff317NJy1q9czz77vPr27a9Zsz5zpCNNSkpUUlKiFi6cL4PBoMGDh+rvfx+n1q3bOK3r5eWtadPe0jfffKVvvvnSkYrqyJHDOnLksH766UeZTF76058u1t/+9piiomoXMKpK69Zt9OCDj+jFFyfLarVqxoz39J//vFOnbeTl5TteV5d27dQypwb8LRu/zr6dvHLL1Lyd2nBHXbNfzLVUixYtddllVygp6SE99tgjOnr0iCT7CUr5wGBD0+KU9QqqDVfuy/nvUf0gzaibvnE91Tv2fOWV5Ne88Dkm0DtARkPjPxXtimNBVQ+k3H33fXr88b9py5bNeuutaQoLC9eVV15d6TZyc3P0yCMPOC4KoqOjNWTIBY6LIh8fH8fJ6aJFC7R48cLS8juXNz//VN2pqUeXfRnXHFPLa9GihUwmL8fFWK9evd0aGGtstfnOyn/35f8m7hAUFKwPPvhEn332iebM+V4pKSmyWq3at2+v9u3bq++++1q+vr66+upr9eCDDzf6GI+tW7d2ej9y5Ci394CoTd1PSkrUY4897Pj7tGjRUoMGDVGrVq0VFhYmHx8fRzD7q69mOVId1XQOWN3Yi9VZsmSRXn31Zcf788/vod69+yo+Pl6BgUFOvX+nTn1RGRkZVY4Pejadb56tOnaLUfsu0SrywGtF3ya6VmxIe92pUxcFBwcrJydHBw8eUFZWpkJDw5zGKSpLW1Ze7959tGTJIqfUZ85jG1W8KfnZZ584xgEyGo3q06efevbsqdjYZgoICHB6wLEswFKba8vaHNfKq801U/n2zNVtVfkbdSEhIXUenqIp27a6ZGSprdqebzn/TfLOiMCgK68j8/MLHK+b6jwVZz/ftuHyaR0mW5EHBgZ9vWSgDZZEG3wmycmp/4PtNd3XrOm7b0gbVVJy6hhSUND091FQUZOmEjUaDQoJrD4FF1zr5ptv1bFjR7V06WLt27dHDz/8gN56a7rbUhUuXrzQERTs12+AXn31DQUGBla6bG26l9fEYDDoyiuv1pVXXq3jx5O0desW7dixTZs2bdTBgwdks9m0Zs0qbdmyWe+9N0MdOnR0Wt/Ly1tjxtyuMWNu15Ejh7Vt2xZt27ZNmzZtUGJigiwWsxYunK/NmzdoxozPat2brCqDB59K2bl586Y6DxofGHgqaFT25EV1yh+IAwJO/R3K/01qs53aaIy6Fh/fXM8+O1n33fdXSfbxwJKSkhQfb0/nFRBw6vu59dY/6+9/H+eyfZ8uIOBUo9W5cxd98skX9d6W89/j7D6BORMZDUYF+7j+6WdUzp3HgsDAQL300r906603KDs7W6+/PlX9+vWvtKfO119/6QgKXnrp5Zo4cXKVvaOr6+Fe/rhSm15HhYWuOaaWyc/P18SJTzs9obls2RItWrSgxvQnZ4vCwoIaeyiU/+7L/03cxd/fX/fd96DuvfcB/fHHfm3btlXbt2/Vhg3rlJqaqqKiIn3zzVfavHmTPvjgE5c9ZFOT/fv36e23/89p2rvv/lcDBw5Rq1Y197x0p48/nuG4CL/jjrv00EOPVtmrdcGC+W4vz7vv/leSfSyWqVNf17Bhw6tc9pVXXqh2W2fb+ebZymg0yJ9rxUbTkPbaaDSqZ8/eWrXqV9lsNm3cuFGjRv3J0fOgVavWlQa5y25K7t27R3l5uQoMDHK6QVl+2APJnjZxxoz3JdmP/W+9Nb3K1F6uuqapSm22X/4cwNVt1fbt2xyva0rHVZUztW2rj9qebzn/TSq/L1EbVaWgr4/ydWPEiFGaOvX1Bmzr1N+oKc5Tce4wGA0y+Dc8kxRqhza4bpq6DW5s5Y/tr732ZrXXMa7f96nvbsmSlVXe069J+U4PtE9njqZPqI5GZR+f6GVdcsllkuxjmT388P1KT093y/7Wrfvd8fof/3ii2gNIUlJSlfPqo1mzeF122RV64omn9MUXX2vWrG8cebTz8/McN4iq0rp1G1199XV65pnn9O23c/Thh586xk5MSUnRJ5/MaHAZyz+hWFxc7EhFWVvle4gcO1bzAMllPeskOd1kCgwMVFCQ/SZsYmJCnXrKVaWx6tp55/VwaqjS0k6lCCjfJb58ek53CAoKdpTj5MmGBbnL55GvS8pD4Ezk7mNBZGSk7rnnfkn2C4Tp09+udLmy9shk8tLjjz9Zbcrk6tqj6OhTx5WEhJqPu7U5NtfFv/71smO/Q4cOc/Rymjr1JUeqmLNdbb6z8t99YwZNDAaDOnbspBtuuEmTJ7+on35aoP/85x1HMPrAgT/03XffNEpZCgoKNHHiU45xhkaNukiSPXj83HNPu6Qtb4h16+wD3YeHR+jBBx+uNtWtu+tuUlKio85ceOGIai+m8/Jya0y5U97ZcL4J1EZD2+vyPQs2bVpf+rvysY3K9Opl78FgsVgc4xSVPcTTqlXrCsf3HTu2OR44uO66G6od78fdx5XanAO4s60q/0BFVd9vbZ1JbVt9ZWdnKysrq9pl7MNS2OuFn59fhfEUfXxOnRuazdW3oXW9bq9O+brR0GvWpj5PBVA/tMF109RtcGMrn4L7xAn33tusuG/X3FcNCgpyPGCUlJRY6Xjz5dXmb4yGIzDogby8vDRp0gu6/PIrJUkHDx7QQw/d55SP3lXKj4FXU37g339f4/L9l9e2bTtNnfqaI73W1q2b67R+9+7nadKkU0+Q13X9ypx+QVHXpzC7dz/f8brsBlx11q49tczpjXjPnr0l2Z8CctXfojHqmsFgkMlkcrwv/xRKx46dHQHPjRvXO26euktZeoaMjHTHoLr10aFDR0e5t2/fpoyMjHptxzmVXPWNLuBO7j4WjB59o6Ki7A9K/PLLPB048EeFZdLS0iTZH8iobryGoqIipxQqp+vQoaMjGLdt25YajysbNqyrsfy1NX/+XP3yyzxJ9vFXX3rpVT388N8l2YMZzz77tMzmsz/lz7p1a6udn5KS4nhoIi4url5pVMun6avpoqQ6BoNBAwcO0rhx/3RMc8X5QW38+9//0pEjhyXZx617+eV/acSIUZKkPXt265133qp2fedUha5vI8r+5+Lj453a6dOlpqZo//79Lt+/c1nqcj76W4N6gpyJ55tAbTWkvS7fs2DTpo1OYxuVBctP1759B8e4cJs2bXQa26iyFGZlxxWp5v/l335bXWOZG+LQoYM1jlm0bt2pc4DqbqDW1YoVy7Rv3x5Jko+Pjy6//CqXbVuqfdt26lrjzLjOKP9QcmV27tzhGOuqa9duFdJuBwefOj+sKZvR9u1bayzPqQdiqv9+wsLC1a5de0nS3r17nOp5XXXr1t3xesOG9dUsWbaM685TATQMbXDtNWUb7Cp1uV9XPrjr7u+2un2vWVP/fRsMBnXtam+j8vPztXPnjmqXX7+e9qkxEBj0UCaTSc8++7yuuuoaSdLhw4f00EP3N7in0+nK5wSuLtq/aNECHTx4wKX7rkxoaJij12JV48ZUpyxFZX3XP135g2p0dEydB3fv0aOH42b4xo0bqj2w7ty5wzF+T2RklHr06OU0/4orTl1Qvv/+dJf1NKhrXcvJyanTvjdt2ujIee3r66cWLU4NFG0ymRxjDmZmZmrWrM/q+zFq5YorTo1tNn36O/W+2Vy+3MXFxfrgg+n12k75QLO7UzkANXFnu+Pj46PbbrtTkj3I8/77Ff9nytqjjIz0agdA//LLWcrKyqx2X4MH2wdnz83N1U8//VjlsgcO/FHjTaraOnr0qF57baok+7HuhRdekZ+fn2655VZdcMGFkuzH+ffee9cl+2tK3333VbWpQ2bP/swRuBk58qJ67aP8QySuOD7Gxzd3vC6f5tVdFi1a4Kh7rVu30bhx4yVJEyY8p7g4ew+P2bM/r/bC0dXfQcXt2//nEhMTqm0PP/zwfbd/Z7U9Hy0pKdFHH33Q4P2daeebQF3Ut73u0KGjQkLs2VAOHjygJUsWO+b16VPxBqNkv0nUq5f94cSNG9c7PZhT2Tq1/V/Ozs7Wl1/WP61/bVitVs2eXfW1RWFhgb777ivH+/q2V6fbv3+fXnxxsuP9tdde77aeEDW1bWXH+YKCmlOCNYZZsz6vtr354ouZjteV/T3atWvneF3dQ0pHjhyu1c3Rsna2Nm1s2bW4xWLR++/X/1yuWbN4de7cpVblXLNmtQ4fPlTvfQFwPdrg2mmqNtiV6nK/rkuXro6MImvWrNLWrVvcWTQnF198qePB6NmzP1NmZv06LkjS8OEjHa+ruz9bWFh4xmcqOFcQGPRgRqNRzzwzSdddd70ke5rJhx66VydOJLtsH+WfWJs+/e1Kb25s3LheU6e+1OB9ffDB//T772uqfcp74cJfHEGkjh07O82bNu0NbdtW/ZN/3377teN1x46dGlBa+xMu5VPelaUMqAsvL2/deuufJdlvhj/77NOVpsBLSkrSs88+7bhQuvXW2yuk0Rs58k+Oi4h9+/ZqwoTxys2tepDZ339fo6KiolqVsy51bceObbr++qv02Wef1Phk1P79+zRlyrPlPsOoCgPU/uUv9zjSxPzvf+9o9uzPq60jBQUF+vHH77Vw4S+1+mzljRp1keNJpN9/X6Pnn3+22kGOLRaLfvtttWbMqHgT8o477nIEir/99mt9+OF7Vd4cLC4urvSir/zF/N69e+r0WQB3cGe7c/31NygsLExS2ZP0e53md+vWTZL9WPnuu5WnG1248Be99947Ne7rttvucDwF/vbb/1fpU+NpaWmaOPEpl9zULykp0XPPPe04nvz97+McFwaS9Oyzkx03BT/77JOz/unv5ORkTZkyqdLemIsXL9Ts2fYLTV9fP9100y312ofz8XF3tcu+/PILjvGSq/Ldd+XPDzpXs2TDJSUlOs6bfHx89OKLUx0XliEhIZo8+SWZTCbZbDZNmTKpyp4H5YNP7mgjuna1/89lZmY63Ywt7/PPZ+r7791/0demTVtHuu+VK1dU+j9bWFioyZMn6o8/qu+9eLadbwL1UZ/22mg0OrJnSPb2SJJatmxVbeCqbJ39+/c5PUxT2U3Jrl27OdrfOXO+r/TGZFZWlp58clyNPQlcYfbsL7R48cIK04uLizVlyiQlJ9u/r0GDhlQYa7Su8vLy9MUXn+nee+9yHF+6dTtPDz/8aL2254q2rawtzcrKVHLy8XqVw5V27dqhadNer/T4/MUXn2npUvuN8vDwCF15ZcVelgMGDJLJ5CVJ+vbbr3Ts2NEKy5w8eVJPP/3PWj3QUtbOHjlyuMbxlG688WY1a2Zf/ocfvtN///ufatOZlpSUaPHihfrmmy8rzLv99jsdr19++Xmn4UTKHD16RC+//HyNnwFA46MNrp3GbIPdoS736wwGgx56yN7e22w2PfnkuFpl2Xn//ek1tvU1iYmJ1U03jXFs8+9/f1iJiQnVrrN//z5NnfpihelXXnmVY9zMJUsW6euvZ1dYpqSkRC+99LyOH3ftcGOonFdTFwBNy2Aw6Mknn5GXl5e++eYrJSQkaOzY+/Tf//7P6YZRfV199XX65JOPlJ+fp5UrV+iOO8bo8suvVFxcM+Xk5Gjt2t/066/LZTQaddllVzhSpNXHxo0b9MEH/1N4eIQGDRqsjh07KzIyUkajUampKVq79jenA+ddd93ttP7y5Us1e/bniotrpgEDBqlDh44KDw+XxWJRSspJrVz5q7Zt2yLJ3sX/z3/+S7Xl2bt3T4VxCywWqzIy0rVt2xYtX77UEVhr0aKl/vrXe+r1uceMuV2rVq3U5s0blZSUqD//+WZdeeU16tatuwwGg3bt2qm5c+coPz9Pkr2BLwsmlmc0GvXyy//Svff+RRkZGVq5coWuv/4aXXzxJerSpasCA4OUnZ2tgwcPaM2alUpISNCiRSvk6+tbq3LWpa6lpKTov//9j9555y11736ezjuvh1q1aqWQkFBZLBadOJGszZs3ae3a3xw33WNiYvXII3+vsN+YmBi9+OKr+uc/H1NxcbGmTXtD3377tYYPH6m2bdvJ3z9A+fl5On48Ubt379bGjetVVFSk++9/qK5/ChkMBr3yyuu67767dOJEsn75ZZ7WrFmlUaMuUpcuXRUSEqqioiJH2rT1639XRkaG+vUboLvvvtdpW3FxzfTcc89rwoQnZbGY9f7707VgwXyNGnWRWrduI29vb6Wn21OWrlq1UrGxsRoyZKjTNtq376CIiEilp6fpl1/mKTQ0TOedd778/Pwcy5T1fAIai7vaHT8/f40Zc7umT39bNptN7733rl5/fZpj/o033qKffpoji8Wsb775Unv37tGoUX9SdHSM0tPT9euvy7VhwzoFBARo2LDhWrZsSZX76tmzl266aYy++mqW8vPz9OCD9+myyy5X79595ePjo3379mrOnB+UnZ2lESNGafnypfX+XJL0zjtvac8ee/BqxIhRuv76G53mh4aGafLkF/Xoo2NltVo1efJEffbZlwoLqzhY/YYN6yqklipLRybZ28LTL/KuueY6pwuXMjk5Ofr880+dppW/MXj8eGKFMR87d+6ikSP/VN3H1ciRf9LSpYu1f/8+XXnl1WrRoqVyc3O0evUqrVy5wrHcww//rdJy1Ub//gP01VezJEkvvTRFt9xyq+LjmztSukRHxzguIOfM+V5z5nyv1q3bqF+//mrXroNCQ0NVXFys5ORkLV26yBFMCgkJ0fXX31Ttvrdu3VLtgzflNW/ewulC1mw269lnn3b0en3kkccqBI569eqtu+++T++/P10ZGemaMuVZTZv2doUx/vr3H+h4/d///kcZGelq1aq1vLy8Sj9LaIPS7txyy+1au9Z+g+Gtt6Zp48YNGjRoiCIiInTiRLKWLFmkXbt2KioqSu3bd3As6w7e3t66/vqb9Nlnn8hsNmvs2Pt05ZVXq1u38+Tv769Dhw5q3ry5OnEiWf36DdDRo0eqHEPjTDvfBNylPu11nz79tGLFMkn2HvpS1SnMypTNt1qtjmEW2rZtp8jIyArLRkdHa9Soi7RkySLl5ubqzjtv1TXXXKcOHTrJZDJp3769mjdvrrKyMnXllVfr559/qu/Hr1GfPv30xx/7NXHiU1qwYL6GDr1AQUHBSkg4pp9//skRVAoJCdWTT06ocXtHjhx2fHeS/Xifm5ur1NQU7dq1U5s3b3Jcz0lSv379NWXKKxUeiqwtV7Rt/fsPdLTLTz75uEaPvlExMTGO9qZFi5Zq2bJVvcpXV9HR0YqNjdOXX87S5s2bdemllys2NlYZGelaunSJNm+294QxGAx6+umJlWbqiYyM0hVXXKmffvpRubm5uueeOzV69I3q0KGjSkpKtGvXTs2bN1dFRYW66KJLKr0hXV7//gP1xx/7VVBQoCeeeExXXHGVwsLCVdYct2/fUTEx9rGb/Pz89dprb2rs2HuVk5Ojzz77RAsWzNPIkRepY8eOCgwMUmFhoU6cSNbevXu0fv065eXl6uqrr6uw30suuUyLFi3QypUrlJqaqjvvvFVXXXWto03fuXO75s6do8LCQg0fPtKp3gE4M9AGV8/VbXBTqOv9uqFDh+n++8fqvffeVWZmpv72t7Hq1au3Bg0aombN4uXl5aWcnBwdPXpE27dv1Y4d22Wz2Zyu+epr7NhH9Mcf+7R27e/au3ePbrnleg0bNly9evVWZGSUbDabMjMzdPDgAW3cuEFHjx6RyWTSU09NdNpOUFCw/vnPCXrmmfGy2Wx6441/acWK5Ro58k8KDQ1VUlKSfv55jo4cOeyS+yioGYFByGAw6IknnpKXl5dmz/5CSUmJeuihe/X22++pefMWNW+gGhEREXrxxamaMOGfKiws1MGDB/T22//ntIyfn5+efHKCLBZrgwKDZRcgGRnpmj//Z82f/3Oly/n7++vxx8dryJALKl0/Ofm45sz5vsr9hIaGadKkF2p8gvvrr2dX+vTD6fr06afJk19UUFBwjctWxmQy6d///j9NnjxRK1YsU35+fpX7HT58pJ5//sUqx/pp3ryFPvzwUz3zzJPavXuXsrOznJ5aP91p9xhrVJu6Fh4eoejoaKWkpMhqtWr79m3avn1btdvt16+/Jk6crKioyp+EGjhwkP73vxmaPHmijhw5rGPHjjqeoKqMyWSq9ESoNmJiYvTRR5/phRcm6bffVis7O1s//PBdDevEVjp9+PCR+ve//09TpjyntLRUHT16RB9//GGly8bFNaswzcvLSw8++LBefnmKzGZzhRv4kvT775tq8akA13JXu3PTTbfo888/VU5Ojlat+lW7d+9y9Frq0KGjnnpqgqZOfVkWi1nbt2+t0GsoNDRMU6a8pG3btlYbGJSkxx57XAUFBfrppx9ksZj1888/Vbj4ueWWW3XhhSMbdEK7Zs1qzZ79uST7eHoTJjxX6XJ9+/bXnXf+VR9//KFSU1P1wguT9cYb/6mw3ObNm6o8jkjSqlW/atWqX52m9e8/sMrAYHXbSk5OrjD/iiuurjEwOHHiJOXkZGvDhvUVAouS/UGW++57UDffPKba7VRnyJAL1Lt3X23evFEJCcf0xhv/qlDO555zfpL+yJHDjjH9KhMXF6dXXnndcZOvKrXplVrmlltu1T/+cWqMp//97x1H2vALLriwyu/gr3+9Vxs2rCt9iOZ3ff75pxUCTB06dHQ8lJWenqa33prmNL9377569933a13W0w0ZMlT33vuAPvjgf5LsaW/WrFnltExcXDNNnfq6vvqq5vOlhnrggYf0xx/79fvva2Q2m/Xjj9/rxx+dz/d69+6rl16aqr/85fYqt3OmnW8C7lTX9rqyMYnKj01TmY4dOykoKEi5uafSfFe2nTJPPvmMEhKOae/ePcrPz3f0Ii9v1KiLNH78BLfelIyLa6Z7731ATz/9T61cucLpwZUyUVFRev31/zh6glVn8eKFNQaaJHv66FtvvV3XXnt9hQc+6qMhbdvVV1+rb7/9SkeOHNbevXsq9A645577dd99Dza4jLVhMnnp5Zdf0+OP/0379u1xeuipjI+Pj8aPn6ALLxxR5Xb+/vdxOnjwgHbu3KHs7Gx98skMp/m+vn6aOHGSLBZrjX+v2267QwsWzFd6elrpg1nOGR0mTpzsSBco2dvljz76TJMmPaOdO3coJSXF8RBTZQwGg2JiKr/+feGFVzRhwnitWbNKhYWF+uabL516F5pMJj366D8UGhpKYBA4Q9EGV83VbXBTqM/9urvvvk9xcc00bdrrys7O1pYtm7VlS9XjkQcEBCooqG5DVlVV1jfe+I/eeee/+vLLWTKbzVq2bEm190yioyu/Jh416k968sln9MYbr6qkpKTS9rF3776aOHESgcFGQGAQDo899oRMJi99/vmnSk5OdjyN0qpVw57yGzJkqGbOnK3PP/9U69atVUrKSfn6+io6OkaDBg3R6NE3qlWrVpo7d06D9vP669O0fv1abd68UXv37lFCwjFlZmbKZrMpODhYrVu3Uf/+A3XNNaMr7Ur/8cefa+3a37Rly2bt27dHiYmJysnJlsFgUEhIqNq2bafBg4fq6quvVUhISCUlqJnRaFRgYKBiY+PUtWt3XXzxperff0CDL+r8/f316qtvaMOGdZo3b662bt2itLRUSVJERKR69uylK664qlZPisTHN9eMGTP166/LtWTJYu3YsU3p6Wkym80KDg5R69at1bt3X11yyWX1DmZWV9e6dOmqOXN+0a5dO7Vp0wbt2LFdR48eUUrKSRUUFMjLy0uBgUFq2bKlunbtppEjL1LPnr1q3GfXrt00a9Y3Wr58qX79dbl27tyh9PR0FRYWyN/fX7GxcWrfvoP69OmrYcOGKzIyql6fTbIHxN988y3t2LFNv/wyX9u2bdGJEyeUm5sjHx8fRUZGqnXrturZs7cuuGCYY7D5ygwcOEjffjtHc+fO0apVv+rAgf3KzMyUwWBQRESE2rfvoAEDBumSSy6vdP1rrrlOcXHN9P3332j37p3KyMiodQpYwN1c3e4EBgbpppvGaMYMezDjvffe1ZtvvuWYf/XV16ljx86aNeszbd68SenpaQoICFRcXJwuuOBCjR59o6Kjo2tM8yeVpXh5TqNG/UnfffeNduzYrtzcHIWHR6hr124aPfoGDRo0xDG2a32kpaXqhReek81mk8lk0uTJL1Xb/tx77wPauHGDtm/fqtWrV+rLL7/QLbfcVu/9N5XAwCD93/+9q59/nqP58+fp0KGDys3NUUREpPr06atbbrlVXbp0a9A+TCaT/vOft/X1119qxYplOnz4kPLy8ipNDfbTTwu0du0abd26RX/8sV9JSUnKzc2VyWRUWFi4OnToqAsvHK7LLrvS6QlPV1u3bq3joZbo6Gg9++zkKpc1Go16/vmX9Oc/j1F2dpamT39bffv2dwTKyzz33BT17t1Xixb9ogMHDignJ1tms+vG+7v33gfUu3cfffXVbG3fvk3Z2VkKDg5WfHxzDR8+SqNH31Ahu4K7eHt769///j/9/PMczZs3V/v371NRUZHCwsLVvn0HXXLJZbrssiscvUarcjacbwKuVtv2un37DgoLC3P0OpCqHtuojNFoVM+evbV69cparRMSEqL33vtI3333tRYtWqjDhw/JbC5RRESEunTppssvv9JpDBt36tOnr2bOtD8MumrVr440b/HxzTVixCiNGXNbva+X/Pz8FBgYpODgYLVt206dO3dR3779dP75PV1Sdle0bf7+/vrww080a9ZnWrNmtY4dO6r8/PxqUy27U0xMjD744BP98MO3WrRogY4dO6aCgnxFR8dowIBBuvXWP9d4jhkUFKx33/1A33//jRYu/EWHDx+W2Vyi6OhoDRw4WDfffKtat25Tq3sX0dHR+uSTL/TFFzO1fv1aJSUlqaAgv9pxEFu0aKkPP/xUa9f+piVLFmv79q1KTU1Rfn6+fH39FB0drbZt26l3774aNuzCKjMn+Pn56d//3959hzV1vXEA/4YoW0AUBOpWFHEyHHVrte7WPau1dWuHVStqqVp3f9pqax1Va+uo4qhVUdwbrYOh4gJEQRBBhhACBITk90eaK5GQMAJY8/08T59m3Nx7EmLee897znt+/BknTx6Hr+8hhIWFIjMzA7a2VdCihRsGDx6GJk2alrgPhohKH2OwZqUZg8tKcfrrevfui06dusDPzxdXr/6Dhw+VfYS5uTmwtLSEk1N1NGzogpYtW6Ft23bFrizwugoVKuKLL77CkCHDcOTIIQQGBiA6OhoSSSqMjIxgZWWNmjVronHjpmjd+l210rav699/IJo3b4Hdu//E9etXkZycBAsLC9SqVRs9e/ZGv379C5zQQvolUmg7Kymi8PBwZGe/hEhkBEfHGvraLRERERERERFRuYmNjcXAgcq16TTNLCciIqLSwRhMhu7Zs2goFHIYG1eEs7N+1s3UPiSWiIiIiIiIiIiIiIiIiN4KTAwSERERERERERERERERGQAmBomIiIiIiIiIiIiIiIgMABODRERERERERERERERERAaAiUEiIiIiIiIiIiIiIiIiAyBSKBQKfe0sPDwc2dkvIRIZwdGxhr52S0RERERERERERERERGRQnj2LhkIhh7FxRTg7O+tln5wxSERERERERERERERERGQAmBgkIiIiIiIiIiIiIiIiMgBMDBIREREREREREREREREZACYGiYiIiIiIiIiIiIiIiAwAE4NEREREREREREREREREBoCJQSIiIiIiIiIiIiIiIiIDwMQgERERERERERERERERkQFgYpCIiIiIiIiIiIiIiIjIADAxSERERERERERERERERGQAmBgkIiIiIiIiIiIiIiIiMgBMDBIREREREREREREREREZACYGiYiIiIiIiIiIiIiIiAwAE4NEREREREREREREREREBoCJQSIiIiIiIiIiIiIiIiIDwMQgERERERERERERERERkQFgYtAABAYGoE0bd7Rp447NmzeWd3PKVWE+i/79+6BNG3f079+njFv35pgyZYLwOZUUv39Ehof/7l95m+OO6n1NmTKhxPvavHmjsL/AwAA9tI6IiHRhvH57vQ1/27f5HOpNceTIYeEzPnLkcKkfT5/9DET/dW/D7zRRSemKC2UdpwxNhfJuAP03HDlyGEuWLNT4nJGREczNzVG1qh0aNHBBly5d0aFDJ1SowK+XLjJZJsLDw3D//n08eHAfDx7cQ1RUJHJzcwEA3t4L0bfvB4Xal0KhQHT0E9y/f+/ffd1HaOgDZGSkAwDc3DywYcPmUnsvJXHkyGE8exYLAJgwYXI5t4aI3gSMO6Xrzp3bOHrUFyEhtxEX9wyZmZkwNTVDtWrV0LhxE/To0Quenq3Ku5lqVBfMjo5OhY6NRERUOm7eDMbkyeMAAC4ujfDHH39q3f7583h88EEv4f7YseMwefI0ra85dOhvLF++GADQvXsPLF68vIStfrNs3rwRv/22SeNz5ubmsLCwgKVlJdSpUxcNG7rAzc0DzZo1L+NWFszH50+kpaWhUqVKGD58VHk3543ztp23PHnyBH5+vrh1KxiRkZGQStMAAJaWlnBwcETt2nXg4uIKT8+WqFevfjm3lujtxhhccnlj8LhxE9kXSUhLS4OPj/LfUoMGDdGpU5dyblHpYw8alZhcLodUKoVUKkVk5GOcPHkMDRq4YNmy71G9eo3ybt4b7YMPekMiSdXLvn7+eTV2796pl32VtaNHfREcHAiAiUEi0o1xp/iysrKwfPliHD/ul++59HQpHj2S4tGjCPj6HkLbtu2xaNFSWFpWKoeW5qe6cHNz83grOtiIiP7LGjduAlNTU8hkMoSHh0EqTdMaLwIDb6jdDwoK1HmMvDPIPTw8i9/Y/6CMjAxkZGQgISEBjx8/wtmzpwEAtWrVxvDhI9G//yCIRKJybaOPzy7ExT2Dg4MjE4MavC3nLTk5OVi37mfs3btbGMCc14sXL/DixQvcv38Px44dBQB8/fVcDBo0pKybSmQwGIOJ9C8tLU2I3b1792NikEgTDw9PDB06Qrgvl8uRmpqKkJBbOH36JLKyshAW9gBffDEVO3fugbm5eTm2tugOHjxaZseSy9VPrB0cHPDyZQ6SkhJLvC9zcwtUq1YNjx8/KlEbS8rDwxNXrwaVaxuI6L+NcUd/Fi70xrlzZwAAYrEYnTt3RbNmzWFrWwXJyUm4ffsWzp07A7lcjitX/DFr1nSsX78ZRkalW31+woTJHBhCRPQfUrFiRTRr1gLXr19Fbm4ugoOD0KFDpwK3f70T8t69O5DJlLPVC6IaOAgA7u4tS97oN1i3bu+je/cewv2XL19CKk1DYmIi7t27i5s3g5GRkY6oqEh8//0ynD59EosWLUeVKlU07u9tuAZ7G97D22DRovk4efI4AEAkEqFly9bw9GyJatUcUKFCBUgkqXj06BFu376J0NAHAPL3TRCRfjEGk6Ho2/eD//TgmjcdE4NUZNWqOWrMmvfvPxAfffQxpk6dgBcvXiA29in27fPBxx9/Wg6t/G/o2LEzatasBReXRnBxaQQbm8pYtGgB/Px8i7yvOnXqYvjwUWjUyBUuLo1Qs2YtBAUFYtq0iaXQciKissO4ox83bwYLSUFzcwusX78JLi6N1LYZNmwk7t+/h2nTJiIjIwM3bwbD3/8SOnYs+EKTiIgMk7u7B65fvwpA2elYmE7Jtm3b48oVf+Tk5ODWrVto3bqNxu2fPHmChITnAAA7O3vUrFlTz61/s9SqVVvryPT0dCkOHjyATZs2IitLhsDAAMya9SU2bNistWOXqCQuXbogJAWtrKywcuUaNG/eosDtnz+Px5EjvqhSxa6MWkhkuBiDiaikSnf4NxmcOnXqYuzY8cL9ixfPl19j/gPmz1+EsWPHoU2btrCxqVyiffXvPwjTp89Ejx69UKtW7XIvLUNEVBYYdwrv6tUrwu0BAwblSwqqNGrkigEDBgn3b97kaH0iIsovb2mxvCXHXhcfH4enT2MAAL1794WDgyMAICjoRoGvCQpiCbO8LCwsMWrUGGzZ8gesrKwAAPfv38PatWvKt2H0Vstben7y5M+0JgUBwN6+Gj79dDy6dn2vlFtGRIzBRFRSnDFIguTkZMyY8TkePLgPAPjwwwGYPXsexGJxkfbj7u4h3I6OflLgdgqFArdv38K1a/8gJOQ2IiMfIzU1BSKRCFZW1mjY0AWdOnVGz559ULFixUId++rVK/j7779w504I0tIkqFzZFi4ujTBw4GC0bv1uofbRv38fYa2E18u7/fnnDqxduxoA4Obmjl9++bXAz+fJkyiMHTsKGRkZMDU1xe+/70SdOnUL1YY3jVwuh5+fL44dO4rHjx9DKk2Dra0t3N1bYujQYXBxcS3wtYGBAcKsxdcX9J0yZYJaeQIAaNPGPd8+evfuh/nzv1N7LDw8DIcOHcDNm8F49uwZZDIZLC0tYWNjAzs7OzRu3ARdunQrsOObiMof445SWcWdFy+Shds1amgf9Zn3+czMjEK9D4lEgr/+2otz584iLi4W2dnZcHBwRLt27TFy5GhUqVK1wNfmXfx93bpNahegr8eF4OBAjbHC23thvjIjly9fwvHjfrh//y4SExORm5sLKytr2NjYwMnpHbRo4Ybu3XvA3r5aod4jEZEhKiheN2rUGObm5sjIyMDDh2FIS0tDpUr51zjK28Ho7u4Jd3cP+Pkd0brGUd7nXu+UTEl5AX//iwgKCkR4eBiePYtFZqYMFhbmqFbNAW5uHhgwYJDOa6+81yJXrwZBLpfj+HE/nDx5DBEREUhOTkZubo5QzjI2NhYDB/YF8Or65Pnz59i/fw/8/S8iLi4OAFC9enV06tQFw4ePhIWFpdY2FJWzcwN4ey/E7NkzAACHDx/EmDGfoFo1B7XttF2D5VWcayrVeYtKXNwzjXH59eOqtnFz88CGDZuRlpaGv//+CxcunEVs7FOkpKSgRQt3bNiwuUjvIa+srCwcPPgXTp06gZiYaGRmZsLOzg6tWr2L4cNHaZ31krdyz4EDR+Dk5FTgtkeOHMaSJQsB5D//KMl5CwDk5ubi1KkTuHjxPO7du4uUlBcQiUSws7NHixbuGDBgEBo1Kvj6O69Tp07gyJFDCAsLRXp6OqpUqYoWLdwwaNBQNGnSVOfrIyMfC7fd3PK/B304f/4sDh8+iPDwUKSkpMDa2hpNmjTDsGEj4ObmUeDrNP0NwsJCsW+fDwIDA5CUlAgrK2u4ujbGmDGfoHHjJmqvv3LlMv7+ez/Cw0ORlJQEG5vKaNWqNcaNmwgnp3dK5b0SFQdjcPnF4NfbqFAocOzYUfj5+eLRo0dIT0+Ho6MjOnXqjJEjR8Pa2kZ4bXq6FIcOHcSpU8fx9OlTZGdnoUaNmujZszeGDh1RYH+DpveZlJSIvXt9cOnSBcTFxaFixYqoXbs2hgwZjvfe6642MSQi4iH27NmNmzeDEB8fDxMTEzRp0hSjRo0pdJI3KSkRBw8ewLVrVxETEw2JJBUWFpaoWbMm3n23PQYPHioMUtJGIpFg9+6duHjxPGJjn0IsFsPR0QmdO3fF4MFD1T6vgmiLtyol7efJ+5mr+Pn5aqzm93pfhUpWVhb8/Hzh738R4eHhSEl5gYoVjVGtWjV4eLTEkCHDULNmLZ3vt6wxMUgAgJiYaEyfPg0xMcpRJOPGTcCECVOKtS9jY2PhdlZWVoHbLV36HY4cOazxuYSE50hIeA5//4vYuXM7Vq5co/UkXi6XY8WKpTh8+G+1x+Pj4xAfH4cLF85h2LAR6NixZAuHjhz5EW7cuIarV68gODgIv/++BePHT8q33cuXL+HtPRcZGcrO1BkzZv9nk4Lp6VLMnj0j3wikuLg4+Pn54sQJP0yYMBljx44rszZt3boZW7b8CrlcrvZ4amoKUlNTEBUViYCAG7h06SJ27dpXZu0iosJj3CkcfcadypVthdvR0VFaj/vkyasEa5069XS2MyLiIWbO/FKtoxBQdihFRj7G4cMHsXjxcrRp01bnvvRBJpPB23sO/P0v5nsuKSkRSUmJiIh4iEuXLuDZs1jMmjWnTNpFRPRfoyteN2vWAlevXoFcLkdwcCA6duycbx+qDsbatevA1tYWbm7u8PM7gnv37iEzMxNmZvlLYeYdPOjh8Wpto6dPYzB06EDk5ubke41EIoFEIkF4eBj27fPBhAmT8emnEwr1PiUSCby8ZiA4uPCz5AMDAzBv3mykpqaoPR4WFoqwsFAcPPgXVq36CQ0buhR6n4XRsWNnuLg0woMH9/Hy5UscO3a0WNdi5XlNFRr6AF5eM4SOXH14/jweM2d+ifDwMLXHY2JiEBOzD0ePHsbs2fPQp08/vR1T3yIiHmLevNmIiorM99yTJ1F48iQKhw//jcGDh+Grr2YVOFhMJpPhm29m4/Jlf7XHnz2LxbNnsTh58jimTv1cZ6dsbu6rtQKTk5P12qeRlZWF7777FmfPnlZ7PDExEefPn8X582fx2Wdf4qOPPi7U/vbv34s1a1YhJ+fVb0NCwnNcuPAcly5dwLffLkSvXn2Rk/MS33+/HL6+B9Ven5DwHEeP+uL8+XNYu3YDXF0bl/g9EpUUY3DByjoGZ2RkYO7cWbh27ara46rr3dOnT2H9+k2oVs0BT55EYebML/MNWg4PD0N4eBguX/bH6tVrYWJiovO4t27dxJw5s9QG+aoev3XrJgICrsPL6xuIRCIcPPgXVq78Xu3vk5Ulw5Ur/rhyxR+zZ8/DwIGDtR5vz57d2LBhLWQymdrjqakpCAlJQUjIbfj47MSiRcu0Xtvfu3cXs2ZNR3JyksbP4PDhg1i1ao3O918Y+uznKY6goEAsWPCNUH5XJTs7G48eSfHoUQQOHNiHiROnvHHL3pRrYlAhl0ORJS3PJpQLkYklREZvThXX+/fvYcaML/DiRTLEYjFmzfLCgAHafyi0efQoQrj9+sjFvGQyGSpWrIjmzVugceMmqF69BiwsLJGdnY2YmGicP38WDx+GIyoqEjNmfIZt23YVONpjzZofhM5ZsViMHj16wd3dAxUrGiMsLBS+voewZ89uxMfHF/t9AcrFtufPX4TRo4cjKSkRv/++BZ6eLdGihfroubVr1yAsTLnwdrdu7+ODD/qX6LjlafHihQgMDED16jXQu3c/1KhRA1JpGvz9L+Hy5UvIzc3Fxo3rYG5ugaFDhxdp35MmTUVqagp+/XW98L35/vsf8m2X93t08eIFbNq0AQBgYmKC9u07onlzN1SuXBlyuRyJiYkIC3uA69evleBd09tMIZcjV2p4sUds+ebEHsadwtNn3OnUqQv++OM3AMDffx9A9+49Nc6qvn//Hg4e/AsAYGtbBb169dHaRtUAkri4Z3Bzc0eXLt1ga2uL+Pg4nDhxDGFhoUhLS4OX10xs2LClyB0tqrjg5TUTAFC3bj1MmjQ133Z5L/o2blwnJAUrV66M9957H3Xr1oO1tTWys7MQGxuLe/fuaC27Q0TlSy6XI1uWXt7NKHPGphYw+g/Fa3d3T6FUdVBQgNZOSdXsH9X/c3NzcOtWcL6OpbxrGzk4OKrN3nn58iVyc3Pg4OAAT89WqFevPmxtq6BixYp48eIF7t4NwZkzp5GVJcOmTRtgZWWNwYOH6nyvCxZ8g+DgINStWw/du/dA9eo1kJ6enq+6iUp8/DPMnfs1JJJUtGvXAe3bd4ClZSVER0fDz88XMTHRSEhIwOefT8G2bX/C0bHg2WfF0aNHL2H2SFBQYJETgyW5ppo71xsymQwrVizBixcvULlyZcyZ451vu1q1ams8tkSSitmzZyA+Pg6tWrVB+/YdYWtri6SkpHwdn4WVm5uDefNmIzw8DA0aNESPHr1QrZoDkpOTce7caQQHByErKwtLl36HSpUqafye6ktxzlsAZbJ0ypQJyMhQ/u61aOGGtm07wNHREXK5HA8fhuPoUV8kJydh//49yMl5qfFzB4D58+cJSUETE1P06/eBMGPuzp0QHDnii7Vr12hd1xIAqlevIcwa3Lt3N9zdPfS2ZMnSpYtw9uxp1KtXH92798A771SHTJYJf/9LuHDhHABg3bqf0aRJM7Ro4aZ1X1eu+OPcuTOwtrZGv379Ub++M3JycnDlij/OnDkFuVyOJUsWoUmTZti71we+vgdRt2499OzZG46OTpBIJPDz88Xdu3eQni7F/PnzsHv3/kJXEHkbyeVyrQMu31YmJiaMwYzBGi1d+h2uXbuKxo2boFu392FnZ4/ExAQcPHgAkZGP8fRpDBYu/Bb/+9+P+PzzKXj+PB5du3ZDq1ZtYGlpicePI7Bv3x5IJBIEBQVg27atmDhR+8Do+Phn8PKaAalUij59+sHNzQMmJia4d+8uDhzYj6wsGQ4ePIAmTZrBwsICK1YshY2NDfr2/RDOzg2Qm5uLy5cv4cyZUwCAH3/8H9zdPVC7dh2Nx9u4cZ3QX2BmZoYuXbqhadOmsLa2gUSSihs3ruP8+bOQSCSYOXM61q3bmK9fAgBiY5/iyy+nIi0tDYDyfKBPn35wdHRCamoKzp8/i4CAG/DymglLy5JXVihpP4+tbWV8//0PePEiGStWLAWgnCk7dOiIfMeqV0990PSVK5cxe/ZXyMnJgZGREdq0aYuWLVvBzs4e2dnZuH//Ho4dOwKpVIoNG34BgDcqOVhuicHsh9eRcWk7FJmS8mpCuRGZWcG8wxgY129V3k3B1atXMG/ebGRkZMDExASLFy8v0UlydnY2duzYJtzXVoN+0KAhmD17XoHTj8eNm4gdO/7A+vVrERMTgz17fPDpp+PzbXfr1k3s2+cDQPnDtXr1L2onjj169MKIEaMwbdoknD9/tpjv7BVbW1ssWLAYX345Fbm5uViwwBs7dvgI7+PSpQvYu3c3AMDJ6R3MmfNNiY9Zns6fP4tOnbpg0aJlaqNZBgwYjJMnj+O7775Fbm4u1q37Ge3bdyhS2Q3V38nHZ5fwmK6Lk0OHlJ3VYnEF/Prr1gJLhebm5uLOnZBCt4UMg+T6dTzbuR25EsOLPWIrKzh+NAZWrco39jDuFJ2+4k6jRq4YPnwUfHz+REZGOsaNG4MuXd5D06bNYWtri+TkZNy+fRPnzp2BXC5H9erVsXz5Ko0lafIKCwsFAEyb9gVGjx6r9tzw4aOwevUq7N+/B1lZWVi8eAH+/HNvkS64X48L1tY2WmNFbm4ujhw5BEBZSmbr1p0F/s3T06XCCFwienNEPQhCwOl9kGWklXdTypypeSV4dhuCWi6lU7avsAobr3WtcZR3bSNV6e/q1WvAzs4eCQnPERgYkK9TUtvaRpUr22Ljxt8KTBQMGjQE48dPxvTp0/DkSRQ2bvwFvXv3hbm5udb3+88/lzF48FB89dXXarOw+vcfqHH7wMAAiMViLFq0DO+/31PtuVGjRmP+/Hm4cOEcJJJUfP/9UqxZs07r8YuqadNmwu379+8W+fUluaZSlUpfvXoVAGXiSdc1XF4REQ8hFouxcOES9OzZu8ht1yQhIQEJCQkaZ9INHTocO3b8gXXrfoZcLsfy5Uvg4eGp9zKvKkU9bwEAmSzz339v6TA1NcXixcvRoUMntW169OiFjz/+BF5eMxEYGICDBw+ga9fuaNWqtdp2J08eF9bcrlKlKtat+1WtE7hXr74YPHgYpk2bJCTgCtKjRy9hoNWFC+cwceInGDhwMFq2bI2qVe20vlaXkyePYcSIj/D559PVzgv79euPrVu3YNOm9VAoFNi5c5vOxODZs6fRsKELfvppHWxsKguP9+nTD3Xr1sPmzRuRm5sDb+85CAsL1bh8wYcfDsD06dMQEHADMTHRuHjxPN57r3uJ3uN/1ePHEfjnH3/IZJnl3ZQyZ2pqhnffbV+oiimliTFY6U2KwWfOnNJY1vrDDwdi/PiPERHxEMHBgfj888lISXmBNWt+ybe0SLduPTB27ChkZWVh//49+OST8VoHIAQGBsDKyhpbtvyhtoRT9+490L59R3z22SQoFAr89tsmZGRkwNW1MVav/gXW1tbCtr1790Xt2rXx22+bkZOTg337fPD113PzHeuffy5j27atAIAmTZpi2bKVsLe3V9umf/9BuHXrJr766nNkZKTju+/mY9++g6hQQT299P33S4Wk4HvvdcfChUvU3ufgwcOwe/dO/PTTjwW+96IoaT+PqakZOnXqgtjYWOGxatUcdcbuxMQELFzojZycHFSubIuVK39EkybN1Lbp3bsvRo8ei6+++gwREQ+xadMGdOrUpcDkbFkrt2EQGee3GmRSEAAUmRJknN9a3s3AsWNHMGvWdGRkZMDKygo//bS+WJ2zcrkcKSkvcPHiBUyePB737t0BAFSoUAGjRo0p8HVubh5aaxKLRCKMGfOJ0MmrqbYvAOzatQMKhQKAslNSU4CqWtUOS5asKPK6VQVp1aq10PkZHx+HpUuV6989f/4cS5Yob4vFFbBo0TJYWmrvUH3TVavmgIULl2ic4v7++z0xZIhylmBWlgx79/qUentUHbgNGzbUun6gWCzWuTg6GZ7Y37caZFIQAHIlEsT+Xr6xh3Gn+PQVd6ZPn4lZs7xga1sFubm5OH36JFavXolvv52L1atX4syZU7C2tsGcOd9g+3YfODs3KFT7Onfumi8pqGyTGDNmfC2shfP48SP4+18qwjsvupSUF5D+Oyu4U6euWv/mFhaWei/xRkQld+3EboNMCgKALCMN107sLtc2FCVeu7g0EhIsDx+GIzU1Ve159bWNXq0XplqvLO/zr16juYQZAFhbW+tMEjg5OeHrr5UloqVSqZAk0aZhQxfMmDG7SHF76NAR+TokAeWMk4ULlwhVDK5e/SdfecuScnBwFG6npaUhJ+dlkV5f3tdUgwcP1VtSUKVRI1fMmPG1xr/h6NFjhQ6+Fy+SceSI5nO88nLo0EGh897L65t8SUEVS8tKWLbsf8K/uV27duTbZufO7cLtefO+1dj5WKdOXXzzzXyd7erW7X106fKecD8k5Da++24++vbtgX79emDWrOnYunULgoIC1cqOFoabmwe++OIrjYPFPv74E9jZKTukb9y4plYeVJOKFSti+fKVaklBlY8++hjm5hYAlLMy69ath6+/npvve1KhQgWMH/+qw181C8sQXb58wSCTgoAySX/58oVybQNjcOGUdQxu1aqNxrVuzczM1K6DHzy4j3HjJuVLCgLK39733+8FQFk+9e5d3ZMZZs6crZYUVPHw8ISnp3LQt3KdxwwsXfq9WlJQZfToT4Tk7NWr/2g8zsaN66BQKFC5cmX88MPP+ZKCKs2bt8AXX3wlHPfcuTNqz4eHhwnlVh0cHPDtt99pTH6OGPERunbtVtDbLhJ99fMU1c6d2yGRKP/NLV/+v3xJQRV7e3ssXfo9xGIxcnNzsWdP+Z7n5/VmzI+mMrd9+x9YtGgBcnJyYG9fDb/+ulXnj7uKn58v2rRxF/5r29YTPXu+h9mzvxI6Z42NjbFs2Uq9ZMCbN1e2KyYmOl/t6OzsbFy5oiyRYWlpiQ8+GFDgfpydG6BVqzYlbo/KxIlThAW7L1w4h717fbBwobfQxkmTphRqQe833aBBQzXWHFcZOXK0cDL/ekAoDaampgCUF7OqEShE9OZj3Ck5fcWdfv364/PPp6Ny5fydJ4Cyw2zHjm3w8ztS6LZpW//FyMgII0eOFu6fO3e6wG31QRUnAGUHEBERFV5R47VYLEaLFi0AAAqFIl/ZL9UMhlq1aqNKlarC46pOyQcPHiA9Xb1kbHCw5o7MolDFcgCFqiIyePDQIs1mfz22vc7MzAyDBg0R7uv7OqlSJfUOsNTUog18K+9rKk3luUoq73WpJqNHvzpXKe1zkaJSdVLa2dmjR49eWre1trZBu3btASjXAcvOzhaei42NFUrL16pVG+3adShwP+3addB53iwSibBkyQpMnfp5vk7XhIQE+PtfxKZN6zF16gT069cTW7b8Kqx3rcuIEaMKLEsqFouFmUpZWVlC0lTbeymocpGJiYla8nvAgEH5ZreoNGnSVHju8eNHOt8Dkb4xBhdOecTgIUOGFfhc3gE0YrEYAwcOKnDbvH9PXb8zlSvbolu39wt13PbtOxZYMtXU1FRILsbGPs1XJvjhw3DhuvmDD/prTC7m1aNHL4jFyt/Ka9fUE415KyYNHDhE7dr8ddoGdZcGbf08RaVQKHDs2FEAyioOmkqq5lW7dh1hSZXXP7PyVG6lRM07f2rwpUTLg0KhwI8/rhRKjtWtWw9r1vwCe/tqejtGx46dMWuWV6H2mZOTg/Pnz+LixfMICwtFYmICMjIy8i2ArvL8+XO1BbLDw8Pw8qVydGSzZi1gbGys9Xienq3wzz+XC/9mtKhQoQIWLVqOMWOGQyqV4scf/yc817Jla40zJ/6LWukoO2hvb4/atevg0aMIxMfHISkpUe2EQ99at34XoaEPIJGkYvLkcfjoo4/Rvn1HnaXuiADA6ZNPDb6UaFlj3Hmz4k5o6AN4ec1EXNwz1KlTFzNmzIa7uyesra2Qmqpc72Dr1s14/PgRVq1agbCwUMyd6611PRkLC0th3ZqCtGz5Kpbcu1f0kmdFYWFhiSZNmuLOnRAEBFzHrFnTMWTIMLi7exr0WjFE/yWte4ww+FKiZa0k8drd3VNYzywoKBCdO3cVnlPNPHi9c1F9jaObaNu2HQDgyZMoJCQkAADeeae62sy4vKKjn+DYsaMIDg7CkydRkEqlyMqSadxWtVaSNs2bF610a506dWFnp72UYsuWrQGsBQBhIJO+qCoXqBR13bfyvKays7PDO+9U1/t+Xy+p+brGjZvC3NwCGRnpePDgPuRy+Ruxllh6ulSYzVK1alVcuqR7tpIqGZiVlYXY2KdCgi/v98zTs6XG1+bl6dlKWEOwIGKxGGPGfIKhQ4fj0qWLuHbtH4SE3EZ09BO18+fk5CRs2fIrTp06gR9//Fnn37igmRUqeX970tK0X7vpGhhXpUoV4bara8HnrBUqVIC1tQ2SkhINehByu3adDL6UaFljDH7zY7C236y8faA1a9bSWsEn7++Rrt+ZRo1ctc6iVP9ta6x1X6ptFQoFpNI0tapwN28GCbdzc+U6y0wDgLm5GdLS0vIlN/Ne66tmNBbE1bWxEJdLqqT9PEX1+PEjIblYqZJVoT4z1TmHKjmrqTJfWSu3xKBx/VaoWNcTiixpeTWh3IhMLCEqpxPQvXt3C+W1mjVrgVWr1midbqvJ6wtwSiSpiIyMxNGjh5GSkoLLly+hefMWOjP/UVGRmDNnVpFGYqWnq39fEhMThNvVq9fQ+foaNXRvUxROTk6YM8cb3t5zhMcqV7bFwoWL9bYod3mrUaNmobZ59CgCgHLkYGkmBseMGYvLly8hIuIhIiIe4rvvvoWRkRHq13dGkybN4ObmjrZt25XamhH032bVqhUqeXoiV2p4sUdsWT6xh3HnzYk7EREPMWnSp5DJZHB1bYL163+FqemrGeFVqlRB9+490KFDR0ydOhH37t3F4cN/w9XVFf37Fzzq8Z13qus8to1NZVSqVAlpaWnCxWZpmjVrDj77bBKkUin8/S/C3/8iTExM4erqiqZNm8PDoyU8PDwLHDFOROWrlos7ajRogWxZyTsK/muMTS3KJVlRknitvsbRDeF2XNwzxMY+BaDsuMyrdu06sLWtguTkJAQG3hA6JdXLnqm/RmXz5o3444+tyM3VXl5Q5fVYrklBJbMKUtRzAH3Hvtc7E4t6blWe11R2dvobHKZiZWWls4NPJBKhevXqCAsLhUwmQ1pams5ZEWUhPj5e6Li8f/8evLxmFun1kjwDHkvzPNXU1Azdu/dA9+49AAAZGRkIDb2P4OAgnDx5XEgwRkVFYtas6dixw0freZaNjY3W4+UdzJWVla1lS+j8O1as+Gogn+5tK/57zCyt273N6tSph1q16hjkZ2BiYsIY/C/GYHXafjvyDhYuyu+Rrn9j+v1ty3tc9d/UZ89era23c+c2rft5neS1QfdFiUN543JJ6KOfp6jyfmZXrvgLVaUKSyKR6Exul4Vy7Q0RGRlBZFa0E1gqmbx132UyGRQKzZlzbQpagPPTTydg5swvcPNmMNauXQMbm8ro06efxn1IpWn47LNJwo+znZ0d2rZtLwQnY2NjIRifOnUCp0+f/Lf96u3NW6ZC2/TkV9sUXBKzuKpXrw6xuIIQFFu0cCvVxFhZK8xnlvezL2zpkOKytKyELVu2YefObTh8+G8kJCRALpcjLCwUYWGhOHBgH0xMTNCv34eYPHnaf36NR9I/kZERKhSx84SKj3HnzYk769f/DJlMOZJz+vSZBbbN1NQMX345E5MmfQoA8PHZpTUxaGam+3NQ7TctLQ2ZmaUbJwDlehs7dvjgt9824cyZU8jMzERWlgzBwUEIDg7C9u2/w9a2CsaMGYuhQ0e8ETMGiEidkZERTM15HldWShKvGzRwEQZ/PHoUgdTUFFhb26itU6QqW5aXm5s7zpw5pVb6TH1to/ydkjt3bsNvv20CoPyOuLt7onnz5qhWzRHm5uZqyQRVguX1WK5JYWJ6XtqWWni1z1fb6PsaKW+HlJWVVZEHupTnNVVpjJAv7PmW+t8k/Y1IDJZ0ZpqqkgUAZGS8muFV2uep5ubmcHPzgJubBz75ZDy2b/8dGzb8AkA5k+L06ZNa15HU57mXSFT4fRVlW0NmZGRUqN850g/G4Dc/Bhf2N0ufvzFl9duWllb8JFneGARA7Vq/LPpL9NXPU1T6jN3licOkDczQoSMQHf0EZ8+eRljYA0ybNglr124scJ2horCwsMDSpf/DiBGDIJFIsGrVCnh6thQWfM1r3749wj/aHj16wdt7YYHlvW7fvlngMVWLpwIQOju10XcphIyMDHh7z1UbKXPu3BmcOnVCGEn3XyeTZeocKZr3s8/7NyktZmZmmDBhMsaPn4SHD8Nx+/YthITcQkDAdSQmJiIrKwv79+9FcHAQtmzZxhNaonLEuPNmxJ3s7Gxcv34NgPI9NG2qvXxTs2bNYWZmhszMTERGPkZ6ejosLCw0bpuZqftzAF59FmZmpR8nAMDR0Qne3gsxe/Y83L0bgpCQENy6FYzg4EBkZGQgOTkJa9b8gPDwMHz77Xdl0iYiojdVSeK1kZERmjd3g7//RSgUCgQGBqJr1/eEmQc1a9ZC1ar5R0WrOiVDQx8gPV0KCwtLtQ5KDw/1UohZWVnYunUzAGUsW7t2Y4GlrDMzS7cEXmH2n/ccQN/XSCEht4Xb2kojavM2XVMV9nxL/W+i+bymMAoqTVYceb8bnTt3xYoVq0qwr1d/o7I8TxWJRPj4408RFBSAa9euAgCuX7+mNTFIRK8wBhdNecfgt03e2LFy5Wp06NCp2PvKe60vk8l0Lr1S0jikr36eosr7nRox4iN8+eUMve27LHGojIFRrk+0DO+/3xOAcoHRadMmIjk5WS/7r1KlCsaNmwhA+UO9ceM6jdtdv648WRSLK2DmTC+ta/7ExsYW+Jyd3avp5jEx0TrbFx2te5ui+N//lgnHbdeug/CDt2LFUmHK/n9dYT6zvJ99WU6FFolEcHZugEGDhmDhwiXw9T2Bn35aLyQFIiIe4sCB/WXWHiLKj3HnzYg7qampwqg0CwsLnaU/RSIRLC1fDQrRNsvv6dOYfGsd5T9+ijCqrqxLZhgbG8PNzQNjxozFDz/8hGPHzmDOnG+E2RVHj/riwYN7ZdomIqI3TUnjdd6ZBUFBN/79v+a1jVRatFDOYMjNzRXWKVJ17tSsWStfvLhz57Yw6r9//0Fa17ct7WuxwpwDlOY10okTx4TbBX2+hfU2XFNJJBKkpqZq3UahUODpU+X3wtTUNN96isbGr84Nc3K0j+RPSUkpXkM1yPvdeP48voT7Kt/zVOWaXkpJSaVfOp7obcEYXDTlHYPfNnlLfMfHl10cyhuXi0tf/TxFlbf8bUljd3liYtAAVahQAQsWLEavXn0AAI8eRWDq1AlqdYBLYsCAwahaVVnS7PhxP0REPMy3TVJSEgBlDWRtdbOzsrLUprK/rn59Z6FT9Pbtm8Ii3AUJCLius/2FdezYERw/7gdAuTDw0qXfY9q0LwEoaxV/++1c5OQUrub2m0w1w6QgCQkJQh1nBweHYpVRNTJ61UGtq3NZG5FIhNat22DGjK+Fx27dCi72/ohIPxh39KMkcSfviLaUlBSd6xnIZDK8ePFCuG9lVXCprfR0Ke7e1b6ge95You0iUptXyczixwlAWcKsf/9BGDRoiPDYzZuMFUREJYnXeWcWBAUFqq1t5OamuVOyXr36wrpwQUGBamsbaSphporlgO51a/7557LONpfE48ePdK5ZdP36q3OA4sY+TS5cOIewsAcAlINfevXqq7d9A4W/pnpVVq1kcVlfVJ2DBbl7946wplCjRq75ysJVqvTq/DAh4bnWfYWE3NLZnsKet9jYVEbduvUAAKGhD9S+50Xl6tpYuB0QcEPLlqpt9HeeCkCtpG1ZVYggelswBhdeecbgt1He5HFJ/3ZFiUP37t0t8Vp/+urnAdT7pnXFbmfnhsJA6sDAGzr7hd5UTAwaKLFYjG+//Q59+34AAIiMfIypUyfi+XPtJ8CFYWxsjJEjxwBQJnk2b96YbxtVDeEXL5K1/gjs2bMbqakpWo/17rvKRXKlUil8fQ8VuG1ExEOdFwuF9eTJE6xcuQIAYGJiisWLl8PU1BTDho1A+/YdASgvPDZt2qCX45WnAwf2ap3a7eOzUyil0qVLt2IdI+9Fgz5KDjg5vSPcLuyCyERUuhh3SqakccfCwgIODo4AlPXsz549pfV4p0+fFJKMzs4NdJYA2bVrR4HPyeVy7N79p3C/S5f3tO6rIKpYoa/SNOqxIlfLlkREhqO48bp+fWdhEMmjRxE4c+a08Jy7e/4ORkCZOGnRwg2AslMlb4eNptfkXYdG2yh0iUSCPXt2aW1vScnlcvj47CzweZksEwcO7BXuF/c66XXh4WFYsmShcP/DDweW2kwIXddUqtKihS0pXtp27/5T6yDTvOcqmv4edevWFW5rGxwbFRWJK1d0d5wW5byld29lcjc3NxebNxe/D8HR0QkNG7oUqp1XrlxGZORjrfsrapLy0qULwu169eoX6bVExBhcWOUVg99WLi6NhN/sK1f8cevWzWLvq1OnLsLtAwf2aR2QrK0PobD01c8DFK1vWiwWo0ePXgCUA6937y74+/gmY2LQgBkZGeGbbxagf/+BAIAnT6Iwdep4xMfHlXjfAwcOgo2NDQDViMZQteddXV0BKDtwN2zQXPbt5Mnj2LRpvc5jjRw5WhiNt27dzxpH7yUlJcHbe45eOv5evnyJ+fPnClPov/xyhtpJ77ffLhQuznbu3Kb3UXhlLS4uDosWLdA4+uH06ZPw8VEGfBMTUwwZMqxYx8h70Rkael/rtsuWLUZ4eJjWbQ4c2CfcdnZuWKw2EZH+Me4Uj77iTo8ePYXbq1f/UGD5zLt37+Cnn34Q7qs6q7Q5e/Y0du3KfzIsl8vx008/4N495YzCunXroV27Djr3p4mTkxMAZUeXtnVzQkMfYMuWX7WOrs3MzISf3xHhPmMFEdErxYnXRkZGcHNzF+7v3LkNAFCjRk2tiSvVa8LDw9QG02jqlGzUyFWIv4cP/62xYzI1NRVeXjN0ziTQBx+fXTh9+mS+x7Ozs7Fo0QLExSk/rzZt2qJ+fecSHSs9PR27du3E+PFjhdLcrq5NMG3a58Xanz6uqVTXcKmpKYiLe1asdujTvXt3sGbNKo3r/+3atRNnzyo7yitXtkWfPvnPbVq1agOxWDnj7a+/9iI6+km+bZ4/f465c78u1ODTwp63AMDgwUPh6Kjc/uDBA/jll5+0ljN9+fIlTp8+if379+R7btSoMcLtZcu+w5MnUfm2efIkCsuW6V5fec6cmfDymonr169pPad9+fIl1q5dg8BA5YwjsbiCUBKRiIqGMbhwyjIGv+1EIhGmTlWeTygUCnh5zShU9bjNmzfmO5dwdm6A1q3bAADi4p5h6dLvNMazvXt9cOaM9sHKhaHPfh5ra2thFmB4eKjOinYffzxOKEv+66/r4ePzp9Y1iDMzM3Ho0N84efK4zraUlQq6N6G3mUgkgpeXcp2d/fv3IiYmBlOmTMAvv/wqnMgWh6mpGYYPH4WNG9dBoVBg06YNWLVqjfD84MHD4Ot7GLm5Odi/fw9CQx+ga9f3YGdnj+TkZFy8eB4BAddhbm6ODh064dy5MwUeq3nzFhgyZDj27t2NjIx0TJ48AT179oKbmweMjY0RFhaKw4cPQiJJRefOXXH+/Nlivy8AWL9+LR48UCavOnfuioEDB6s9b21tg4ULl+Dzz6dALpdj4UJv7Ny5BzY2+RcNDgi4nm9qtaosDACcP382X7D94IP+aok0lbS0NPz553a1x/JeoD179jTf2lsNG7ronLnRpct7OHv2NMLDw9CnTz9Ur14DUmkaLl/2VxsROG3aFxrbVRgtW7bC3r27AQBLly7CsGEj4OT0jlDexc7OXgjkhw//jcOH/0atWrXh6dkSdevWh7W1NbKzsxEXF4ezZ0/h4cNwAICVlRUGDhyi+aBEVC4Yd4pOX3Fn9OixOHPmNGJioiGRpGL8+LHo0uU9eHh4wsrKGhJJKgICbuDcubNCh5eraxMMGjRUa/saNGiI9PR0/Pzzj7h06QK6dn0PlSvbIj4+HidOHFMrd+btvTBf6a7CatmyNR4+DEdmZiZmzZqO3r37wsamMlSVuurVc4a9vT2kUim2bPkVW7duRtOmzdC0aXPUqlULFhaWSEtLQ1RUJE6dOi5csDZp0hSeni21HJmIyPAUJ167u3viwoVzAJQjt4GCS5ipqJ6Xy+XCum116tRFlSpV8m1rZ2eHrl274cyZU5BKpRgzZgQ++KA/6tdvALFYjLCwUPj5HUFqagr69OmHo0d9i/v2dXJ398TDh+Hw9p6DEyeOoV279rC0rISYmGgcPeorJJWsrKzh5TVP5/6ioiKFzw4AcnJyIJVKkZiYgHv37iI4OAgZGenC856eLbFo0XK1GRxFoY9rqpYtWwvXg15eMzFgwGDY29sLHcfVq9dAjRo1i9W+orKzs0O1ag7Ys2c3goOD0aNHL1SrVg0vXiTj7NkzCA5WzoQRiUSYO9cbFhaW+fZRpUpV9O7dB76+hyCVSjFu3BgMGDAY9es74+XLl7h37y78/I4gK0uGbt3e19ghnVdhz1sA5XnsypWrMWXKeKSlpWHnzm04ccIPXbp0g7OzMywsLCGTyRAfH4fQ0Ae4ceM60tOl6Nevf77jvv9+T5w6dQKXLl1AYmIixowZgb59PxRK6d29G4IjRw5DJpOhU6cuat+718nlcly4cA4XLpyDrW0VuLt7wMWlEapUqQITEzNIpWmIiAjH+fPn1JIW48ZNRK1atbV+PkRUMMZg7fQdgwlo164DJk6cgk2bNiAlJQVffDEFLVq4oU2btnB0dEKFChWQlpaGJ0+iEBJyC3fuhEChUKitLavi5fUNPv54JNLS0nDy5HGEhj5Anz4fwMnJCampqTh37gwCAq7DyekdWFpa5hvUXRT67OcBAE/PVv/2xcfgm2+80LlzV1haVhJit6trE1hbK2fn2tvbY8mS7/H119ORnZ2NNWt+wF9/7UOnTl1Qp05dmJmZIyMjHc+ePcX9+/cRGHgDWVlZmDhxarHfr74xMUgQiUSYNWsOKlSoAB+fXYiNfYqpU8dj3bpNeOed6sXe75Ahw/Dnn9uRlpYGf/+LuH//Hho1Umby69d3xpw587BixTLk5uYgJORWvhkX1tY2WLRoKW7fvqXzH+706TORmZkJX9+DyM3NwdGjvvmC0LBhI9CxY5cSddBeuXIZPj7KcmgODg6YN2++xu08PFpizJhP8McfvyExMRGLFy/EDz/8lG+74OAg/PHHbwUez9//Ivz9L6o91rJl6wITg9r2FRcXl+/53r376UwMensvQFqaBAEBN/IlFgHlyKQJEyZj6NDhWvejTdu27eHm5oHg4EDExETjhx/+l6+d8+erj2iMiopEVFRkgft0cHDA8uWr1BaEJaI3A+NO4ekz7lhaVsIvv2zEt9/OQ0jILeTk5ODUqRM4deqExn22bdseCxYs0llG1MLCEgsWLMasWdMRHBwodL6pH9sSS5asUFtzoKhGjhyNEyeOITk56d+BNeozI729F6Jv3w+EDkm5XI5bt25qLYXi5uaOZctWFjtZSUT0NitqvNa0JlHedWs0cXZuAEtLS0ilr8o/adqPipfXN4iJiUZo6ANkZGQI1Uvy6tq1G2bPnleqnZIODo4YP34S5s79GpcuXVAbMKlStWpVrFr1kzATTJvTp0/qTDQBQK1atTFixCh8+OHAPGvYFV9Jrqn69fsQf/21F1FRkQgNfYAVK5aoPT9u3ERMmDC5xG0sDLG4ApYtW4mZM79AWNgDtcG2KsbGxpg9ex46duxc4H6+/HIGHj2KwN27dyCRSLBt21a1501MTOHtvQC5uXKdf6/Cnreo1K/vjN9/34kFC77B3bt3kJCQIAye1UQkEsHeXvNMoMWLl2PevNm4csUfMpkM+/fvUZtdKBaL8fnnX8Ha2lprYrBu3fp48OA+cnNzkZycpPN7amFhiSlTpmHw4OJVEiKiVxiDC6bvGExKn346AQ4OjlizZhUkEglu3gzGzZv51xhWMTe3EGbY5eXk9A7WrFmHWbOm48WLZERFRWL9+p/VtqlWzQHff/8DfvxxZYnarO9+nnHjJuLq1SuQyWQ4e/a0UG1AZd26TWr/Rlq3boNff92KhQu9ERUViejoJ8KMXU3EYrHGxHt5YWKQBNOnz4JYXAF//rkdcXFxwmiUmjWLN8rPwsISQ4YMx9atmwEAmzZtwOrVa4Xn+/XrD2fnhti9eyeCg4OQnJwEc3MLODg4oH37jhgwYDDs7Oxw+7buhb2VU+3no2vX93DgwH7cuRMCqTQNlSvbolEjVwwYMAht2rQVSlsUR1JSIhYvng+FQgGxWIyFC5dqXdh0/PhJCAwMQEjILVy+fAl79uzCsGEji3388mJhYYmff96Ao0cP49gxPzx+/AhSaZowYnDYsBFwcXEt0THEYjF++mkd9u3bgwsXziEy8jHS09M1lmjx9T2Ba9eu4Natm3j4MByxsbGQSqUQi41gY1MZ9es7o2PHTujZsw9MTU1L1C4iKl2MO9qVRtxxcHDEr7/+hqtXr+D06ZO4e/cOEhMTIZNlwtTUDPb29mjSpCl69uyttoi9LvXq1cf27buxb98enD9/Fs+ePUV29ks4ODigXbsOGDnyI1StWrI1kOzs7LBt2y7s2rUDN25cQ2xsLDIzM/KV+HB398Cff+7F9etXcedOCB49isDz588hk2XC2NgYdnb2cHFxRffu76NDh04lahMRkSEobLyuV68+bGxshFkHQMFrG6kYGRmheXM3XL58qVCvsbKywqZNv+PAgX04deokIiMfIyfnJWxtbeHi4opevfqorW9TmtzdPbBjhw/27fOBv/9FYcaUk9M76Ny5K4YPHwlLy0rF2repqSksLCxRqVIl1KlTFw0busDDwxNNmzbXS9v1cU1lZmaG337bht27d+LKlcuIjn6CjIwMrWW0SpO9vT22bNmGgwf/wqlTJxAdHY3MzAzY2dmjVas2GDHiI53nmJaWlbBhwxb8/fd+nDx5HJGRkcjJeQk7Ozu0bv0uhg4dgVq1auPIkcM621PY85a8qlevgd9+245r1/7BmTOnERJyC4mJCcjIyICJiSns7OxQp05duLl5oEOHjgVW7DE1NcWPP/6MkyePw9f3EMLCQpGZmQFb2ypo0cINgwcPQ5MmTXW+j2++mY/PPvsC169fw61bwXj4MBxPn8ZAIklDTs5LmJmZwda2CurVq49WrVqja9fuwkwKItIPxmDNSjMGG7LevfuiU6cu8PPzxdWr/+Dhw3CkpKQgNzcHlpaWcHKqjoYNXdCyZSu0bduuwMoFjRs3wZ49B7Br1w5cvHgesbFPIRZXgKOjIzp37oohQ4bB2tpGL23WZz+Ps3MDbN++W9hXfHwcZDKZ1tjdqJErdu/ej/Pnz+LixfO4e/cOkpOTIZNlwszMDNWqOaBevfpwd/dAhw6dUKVKVb28b30QKXQVTC2C8PBwZGe/hEhkBEfHGvraLRERERERERFRuYmNjcXAgcq16TRVNCEiIqLSwRhMhu7Zs2goFHIYG1eEs7N+1s1k3SQiIiIiIiIiIiIiIiIiA8DEIBEREREREREREREREZEBYGKQiIiIiIiIiIiIiIiIyAAwMUhERERERERERERERERkAJgYJCIiIiIiIiIiIiIiIjIAIoVCodDXzsLDw5Gd/RIikREcHWvoa7dEREREREREREREREREBuXZs2goFHIYG1eEs7OzXvbJGYNEREREREREREREREREBoCJQSIiIiIiIiIiIiIiIiIDwMQgERERERERERERERERkQFgYpCIiIiIiIiIiIiIiIjIADAxSERERERERERERERERGQA9JoYFIvFAAC5XA6FQqHPXRMREREREREREREREREZBIVCAblcDuBV/k0f9JoYNDY2hkikbGxWlkyfuyYiIiIiIiIiIiIiIiIyCFlZMigUCohEyvybvug1MWhlZQWRSASRCJBKJZw1SERERERERERERERERFQECoUCUqkEIhEgEolgZWWlt33rNTFoaWkJkUgEIyMRZLJMJCU9h0yWyQQhERERERERERERERERkRYKhUItv2ZkJIJIJIKlpaXejiFS6Dlrl5aWhqdPn0Iul0MuV0ChwL/JQr3mIImIiIiIiIiIiIiIiIjeGnK5XCgfamSkzK298847qFSpkt6OoffEIPAqOahQKP79T99HICIiIiIiIiIiIiIiInq7qMqHikQivScFgVJKDALKrKZUKoVEIkF2djZyc3NL4zBERERERERERERERERE/3lisRjGxsawsrKCpaVlqVTjLLXEIBERERERERERERERERG9ObjwHxEREREREREREREREZEBYGKQiIiIiIiIiIiIiIiIyAAwMUhERERERERERERERERkAJgYJCIiIiIiIiIiIiIiIjIATAwSERERERERERERERERGQAmBomIiIiIiIiIiIiIiIgMABODRERERERERERERERERAaAiUEiIiIiIiIiIiIiIiIiA8DEIBEREREREREREREREZEBYGKQiIiIiIiIiIiIiIiIyAAwMUhERERERERERERERERkAJgYJCIiIiIiIiIiIiIiIjIATAwSERERERERERERERERGQAmBomIiIiIiIiIiIiIiIgMABODRERERERERERERERERAaAiUEiIiIiIiIiIiIiIiIiA8DEIBEREREREREREREREZEB+D+FXaMOMU+wIQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 1280x960 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "generate_k_plot(df, 2**20, 256)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba761181-2f7d-4890-8819-f12a8483cb17",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/cpp/scripts/heuristics/select_k/select_k_dataset.py b/cpp/scripts/heuristics/select_k/select_k_dataset.py
new file mode 100644
index 0000000000..6d174febe5
--- /dev/null
+++ b/cpp/scripts/heuristics/select_k/select_k_dataset.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from collections import defaultdict
+import json
+
+import pandas as pd
+import numpy as np
+
+
+def load_dataframe(filename):
+    """Loads up the select_k benchmark times as a pandas dataframe
+
+    This loads up the timings from the MATRIX_BENCH script into a pandas dataframe
+    This file is generated by running:
+
+    ./cpp/build/MATRIX_BENCH --benchmark_filter=SelectKDataset \
+        --benchmark_out_format=json \
+        --benchmark_out=select_k_times.json \
+        --select_k_dataset
+
+    Note running these MATRIX_BENCH tests takes over 24 hours right now
+    """
+    benchmarks = json.load(open(filename))["benchmarks"]
+    df = pd.DataFrame(benchmarks, columns=["real_time", "run_name"])
+    run_info = [
+        run[1:4] + list(map(int, run[4:9]))
+        for run in df.run_name.str.split("/").tolist()
+    ]
+    df[
+        [
+            "key_type",
+            "index_type",
+            "algo",
+            "row",
+            "col",
+            "k",
+            "use_index_input",
+            "use_memory_pool",
+        ]
+    ] = pd.DataFrame(run_info, index=df.index)
+    df["time"] = df["real_time"] / 1000
+    df = df.drop(["run_name", "real_time"], axis=1)
+    df = df.sort_values(
+        by=[
+            "k",
+            "row",
+            "col",
+            "key_type",
+            "index_type",
+            "use_index_input",
+            "use_memory_pool",
+        ]
+    )
+    df = df.reset_index(drop=True)
+    return df
+
+
+def get_dataset(df):
+    """Returns the training features, labels and sample weights from a dataframe"""
+    # group the dataframe by the input features
+    feature_algo_time = defaultdict(list)
+    for row in df.itertuples():
+        feature_algo_time[
+            (
+                row.k,
+                row.row,
+                row.col,
+                row.use_memory_pool,
+                row.key_type,
+                row.index_type,
+            )
+        ].append((row.algo, row.time))
+
+    # get the features (x), labels (y) and sample_weights from the grouped times
+    X, y, weights = [], [], []
+    for feature, algo_times in feature_algo_time.items():
+        # we can't yet handle the dtype values in training, remove
+        feature = feature[:-2]
+
+        # figure out the fastest algorithm for this set of features
+        algo_times = sorted(algo_times, key=lambda x: x[1])
+        best_algo, best_time = algo_times[0]
+
+        # set the sample_weight to the absolute speed increase above the
+        # time of the next fastest algorithm. the idea here is that
+        # we really want to capture the 2x or 10x speedups - but
+        # the 1% speedups might just be noise (and this is especially
+        # true for the faster runs)
+        if len(algo_times) == 1:
+            # no other algorithm handles this K value,
+            second_best_time = np.inf
+        else:
+            second_best_time = algo_times[1][1]
+
+        # sample_weight = min((second_best_time / best_time) - 1, 10)
+        sample_weight = min((second_best_time - best_time), 10)
+
+        X.append(feature)
+        y.append(best_algo)
+        weights.append(sample_weight)
+
+    return np.array(X), np.array(y), np.array(weights)

From 6fdb0413cc3361c36e327f9a2ada4c6a3bc4bf10 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Wed, 17 May 2023 16:22:25 -0700
Subject: [PATCH 61/78] use `matrix::select_k` in brute_force::knn call (#1463)

Authors:
  - Ben Frederickson (https://github.com/benfred)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1463
---
 cpp/CMakeLists.txt                            |  9 ++--
 .../raft/matrix/detail/select_k-ext.cuh       |  2 +
 .../raft/neighbors/detail/knn_brute_force.cuh | 42 ++++++++++---------
 cpp/src/matrix/detail/select_k_float_int32.cu | 33 +++++++++++++++
 4 files changed, 60 insertions(+), 26 deletions(-)
 create mode 100644 cpp/src/matrix/detail/select_k_float_int32.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5fe02ec794..68ff4f3bb6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -298,6 +298,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/matrix/detail/select_k_double_uint32_t.cu
     src/matrix/detail/select_k_float_int64_t.cu
     src/matrix/detail/select_k_float_uint32_t.cu
+    src/matrix/detail/select_k_float_int32.cu
     src/matrix/detail/select_k_half_int64_t.cu
     src/matrix/detail/select_k_half_uint32_t.cu
     src/neighbors/ball_cover.cu
@@ -600,9 +601,7 @@ target_link_libraries(raft::raft INTERFACE
 # Use `rapids_export` for 22.04 as it will have COMPONENT support
 rapids_export(
   INSTALL raft
-  EXPORT_SET raft-exports
-  COMPONENTS ${raft_components}
-  COMPONENTS_EXPORT_SET ${raft_export_sets}
+  EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets}
   GLOBAL_TARGETS raft compiled distributed
   NAMESPACE raft::
   DOCUMENTATION doc_string
@@ -613,9 +612,7 @@ rapids_export(
 # * build export -------------------------------------------------------------
 rapids_export(
   BUILD raft
-  EXPORT_SET raft-exports
-  COMPONENTS ${raft_components}
-  COMPONENTS_EXPORT_SET ${raft_export_sets}
+  EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets}
   GLOBAL_TARGETS raft compiled distributed
   DOCUMENTATION doc_string
   NAMESPACE raft::
diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh
index 2b233c156d..e05c8882fe 100644
--- a/cpp/include/raft/matrix/detail/select_k-ext.cuh
+++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh
@@ -57,6 +57,8 @@ instantiate_raft_matrix_detail_select_k(__half, uint32_t);
 instantiate_raft_matrix_detail_select_k(__half, int64_t);
 instantiate_raft_matrix_detail_select_k(float, int64_t);
 instantiate_raft_matrix_detail_select_k(float, uint32_t);
+// needed for brute force knn
+instantiate_raft_matrix_detail_select_k(float, int);
 // We did not have these two for double before, but there are tests for them. We
 // therefore include them here.
 instantiate_raft_matrix_detail_select_k(double, int64_t);
diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
index 6cb77bac94..5cb9f6d0ab 100644
--- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
+++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh
@@ -34,10 +34,9 @@
 #include <raft/linalg/map.cuh>
 #include <raft/linalg/transpose.cuh>
 #include <raft/matrix/init.cuh>
+#include <raft/matrix/select_k.cuh>
 #include <raft/neighbors/detail/faiss_select/DistanceUtils.h>
-#include <raft/neighbors/detail/faiss_select/Select.cuh>
 #include <raft/neighbors/detail/knn_merge_parts.cuh>
-#include <raft/neighbors/detail/selection_faiss.cuh>
 #include <raft/spatial/knn/detail/fused_l2_knn.cuh>
 #include <raft/spatial/knn/detail/haversine_distance.cuh>
 #include <raft/spatial/knn/detail/processing.cuh>
@@ -230,15 +229,16 @@ void tiled_brute_force_knn(const raft::resources& handle,
         }
       }
 
-      select_k<IndexType, ElementType>(temp_distances.data(),
-                                       nullptr,
-                                       current_query_size,
-                                       current_centroid_size,
-                                       distances + i * k,
-                                       indices + i * k,
-                                       select_min,
-                                       current_k,
-                                       stream);
+      matrix::select_k<ElementType, IndexType>(
+        handle,
+        raft::make_device_matrix_view<const ElementType, int64_t, row_major>(
+          temp_distances.data(), current_query_size, current_centroid_size),
+        std::nullopt,
+        raft::make_device_matrix_view<ElementType, int64_t, row_major>(
+          distances + i * k, current_query_size, current_k),
+        raft::make_device_matrix_view<IndexType, int64_t, row_major>(
+          indices + i * k, current_query_size, current_k),
+        select_min);
 
       // if we're tiling over columns, we need to do a couple things to fix up
       // the output of select_k
@@ -270,15 +270,17 @@ void tiled_brute_force_knn(const raft::resources& handle,
 
     if (tile_cols != n) {
       // select the actual top-k items here from the temporary output
-      select_k<IndexType, ElementType>(temp_out_distances.data(),
-                                       temp_out_indices.data(),
-                                       current_query_size,
-                                       temp_out_cols,
-                                       distances + i * k,
-                                       indices + i * k,
-                                       select_min,
-                                       k,
-                                       stream);
+      matrix::select_k<ElementType, IndexType>(
+        handle,
+        raft::make_device_matrix_view<const ElementType, int64_t, row_major>(
+          temp_out_distances.data(), current_query_size, temp_out_cols),
+        raft::make_device_matrix_view<const IndexType, int64_t, row_major>(
+          temp_out_indices.data(), current_query_size, temp_out_cols),
+        raft::make_device_matrix_view<ElementType, int64_t, row_major>(
+          distances + i * k, current_query_size, k),
+        raft::make_device_matrix_view<IndexType, int64_t, row_major>(
+          indices + i * k, current_query_size, k),
+        select_min);
     }
   }
 }
diff --git a/cpp/src/matrix/detail/select_k_float_int32.cu b/cpp/src/matrix/detail/select_k_float_int32.cu
new file mode 100644
index 0000000000..42094bbb67
--- /dev/null
+++ b/cpp/src/matrix/detail/select_k_float_int32.cu
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <raft/matrix/detail/select_k-inl.cuh>
+
+#define instantiate_raft_matrix_detail_select_k(T, IdxT)                     \
+  template void raft::matrix::detail::select_k(const T* in_val,              \
+                                               const IdxT* in_idx,           \
+                                               size_t batch_size,            \
+                                               size_t len,                   \
+                                               int k,                        \
+                                               T* out_val,                   \
+                                               IdxT* out_idx,                \
+                                               bool select_min,              \
+                                               rmm::cuda_stream_view stream, \
+                                               rmm::mr::device_memory_resource* mr)
+
+instantiate_raft_matrix_detail_select_k(float, int);
+
+#undef instantiate_raft_matrix_detail_select_k

From 8e412b4b20f140e4f86ffbe7544df084b1a5731e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Akif=20=C3=87=C3=96RD=C3=9CK?= <akifcorduk@gmail.com>
Date: Thu, 18 May 2023 03:20:12 +0200
Subject: [PATCH 62/78] Add generic reduction functions and separate
 reductions/warp_primitives (#1470)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds bunch of new device reduction functions such as:

- Generic device reductions that takes reduction operator as argument.
- Ranked reductions to return the index/rank of the reduced value.
- Weighted random reduction to have probabilistic reduction using conditional probability.
- Binary reduction to reduce binary values more efficiently.

There are tests implemented for all device reduction operations.

This PR also separates warp primitives to the `warp_primitives.cuh`.
All reduction functions are moved to `reduction.cuh`

Authors:
  - Akif ÇÖRDÜK (https://github.com/akifcorduk)
  - Corey J. Nolet (https://github.com/cjnolet)
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1470
---
 cpp/include/raft/random/device/sample.cuh     | 104 ++++++
 cpp/include/raft/util/cuda_utils.cuh          | 312 +-----------------
 cpp/include/raft/util/device_loads_stores.cuh |  57 ++++
 cpp/include/raft/util/pow2_utils.cuh          |  11 +-
 cpp/include/raft/util/reduction.cuh           | 202 ++++++++++++
 cpp/include/raft/util/warp_primitives.cuh     | 259 +++++++++++++++
 cpp/test/CMakeLists.txt                       |  12 +-
 cpp/test/util/reduction.cu                    | 196 +++++++++++
 8 files changed, 841 insertions(+), 312 deletions(-)
 create mode 100644 cpp/include/raft/random/device/sample.cuh
 create mode 100644 cpp/include/raft/util/reduction.cuh
 create mode 100644 cpp/include/raft/util/warp_primitives.cuh
 create mode 100644 cpp/test/util/reduction.cu

diff --git a/cpp/include/raft/random/device/sample.cuh b/cpp/include/raft/random/device/sample.cuh
new file mode 100644
index 0000000000..f08db3e0a2
--- /dev/null
+++ b/cpp/include/raft/random/device/sample.cuh
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <raft/core/cudart_utils.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/util/cuda_dev_essentials.cuh>
+#include <raft/util/warp_primitives.cuh>
+
+namespace raft::random::device {
+
+/**
+ * @brief warp-level random sampling of an index.
+ * It selects an index with the given discrete probability
+ * distribution(represented by weights of each index)
+ * @param rng random number generator, must have next_u32() function
+ * @param weight weight of the rank/index.
+ * @param idx index to be used as rank
+ * @return only the thread0 will contain valid reduced result
+ */
+template <typename T, typename rng_t, typename i_t = int>
+DI T warp_random_sample(rng_t& rng, T& weight, i_t& idx)
+{
+  // Todo(#1491): benchmark whether a scan and then selecting within the ranges is more efficient.
+  static_assert(std::is_integral<T>::value, "The type T must be an integral type.");
+#pragma unroll
+  for (i_t offset = raft::WarpSize / 2; offset > 0; offset /= 2) {
+    T tmp_weight = shfl(weight, laneId() + offset);
+    i_t tmp_idx  = shfl(idx, laneId() + offset);
+    T sum        = (tmp_weight + weight);
+    weight       = sum;
+    if (sum != 0) {
+      i_t rnd_number = (rng.next_u32() % sum);
+      if (rnd_number < tmp_weight) { idx = tmp_idx; }
+    }
+  }
+}
+
+/**
+ * @brief 1-D block-level random sampling of an index.
+ * It selects an index with the given discrete probability
+ * distribution(represented by weights of each index)
+ *
+ * Let w_i be the weight stored on thread i.  We calculate the cumulative distribution function
+ * F_i = sum_{k=0..i} weight_i.
+ * Sequentially, we could select one of the elements with with the desired probability using the
+ * following method. We can consider that each element has a subinterval assigned: [F_{i-1}, F_i).
+ * We generate a uniform random number in the [0, F_i) range, and check which subinterval it falls.
+ * We return idx corresponding to the selected subinterval.
+ * In parallel, we do a tree reduction and make a selection at every step when we combine two
+ * values.
+ * @param rng random number generator, must have next_u32() function
+ * @param shbuf shared memory region needed for storing intermediate results. It
+ *             must alteast be of size: `(sizeof(T) + sizeof(i_t)) * WarpSize`
+ * @param weight weight of the rank/index.
+ * @param idx index to be used as rank
+ * @return only the thread0 will contain valid reduced result
+ */
+template <typename T, typename rng_t, typename i_t = int>
+DI i_t block_random_sample(rng_t rng, T* shbuf, T weight = 1, i_t idx = threadIdx.x)
+{
+  T* values    = shbuf;
+  i_t* indices = (i_t*)&shbuf[WarpSize];
+  i_t wid      = threadIdx.x / WarpSize;
+  i_t nWarps   = (blockDim.x + WarpSize - 1) / WarpSize;
+  warp_random_sample(rng, weight, idx);  // Each warp performs partial reduction
+  i_t lane = laneId();
+  if (lane == 0) {
+    values[wid]  = weight;  // Write reduced value to shared memory
+    indices[wid] = idx;     // Write reduced value to shared memory
+  }
+
+  __syncthreads();  // Wait for all partial reductions
+
+  // read from shared memory only if that warp existed
+  if (lane < nWarps) {
+    weight = values[lane];
+    idx    = indices[lane];
+  } else {
+    weight = 0;
+    idx    = -1;
+  }
+  __syncthreads();
+  if (wid == 0) warp_random_sample(rng, weight, idx);
+  return idx;
+}
+
+}  // namespace raft::random::device
\ No newline at end of file
diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index 687a6b4651..0523dcc81c 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -23,7 +23,10 @@
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/math.hpp>
 #include <raft/core/operators.hpp>
+// For backward compatibility, we include the follow headers. They contain
+// functionality that were previously contained in cuda_utils.cuh
 #include <raft/util/cuda_dev_essentials.cuh>
+#include <raft/util/reduction.cuh>
 
 namespace raft {
 
@@ -523,238 +526,6 @@ DI double maxPrim(double x, double y)
 }
 /** @} */
 
-/** apply a warp-wide fence (useful from Volta+ archs) */
-DI void warpFence()
-{
-#if __CUDA_ARCH__ >= 700
-  __syncwarp();
-#endif
-}
-
-/** warp-wide any boolean aggregator */
-DI bool any(bool inFlag, uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  inFlag = __any_sync(mask, inFlag);
-#else
-  inFlag = __any(inFlag);
-#endif
-  return inFlag;
-}
-
-/** warp-wide all boolean aggregator */
-DI bool all(bool inFlag, uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  inFlag = __all_sync(mask, inFlag);
-#else
-  inFlag = __all(inFlag);
-#endif
-  return inFlag;
-}
-
-/** For every thread in the warp, set the corresponding bit to the thread's flag value.  */
-DI uint32_t ballot(bool inFlag, uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  return __ballot_sync(mask, inFlag);
-#else
-  return __ballot(inFlag);
-#endif
-}
-
-/** True CUDA alignment of a type (adapted from CUB) */
-template <typename T>
-struct cuda_alignment {
-  struct Pad {
-    T val;
-    char byte;
-  };
-
-  static constexpr int bytes = sizeof(Pad) - sizeof(T);
-};
-
-template <typename LargeT, typename UnitT>
-struct is_multiple {
-  static constexpr int large_align_bytes = cuda_alignment<LargeT>::bytes;
-  static constexpr int unit_align_bytes  = cuda_alignment<UnitT>::bytes;
-  static constexpr bool value =
-    (sizeof(LargeT) % sizeof(UnitT) == 0) && (large_align_bytes % unit_align_bytes == 0);
-};
-
-template <typename LargeT, typename UnitT>
-inline constexpr bool is_multiple_v = is_multiple<LargeT, UnitT>::value;
-
-template <typename T>
-struct is_shuffleable {
-  static constexpr bool value =
-    std::is_same_v<T, int> || std::is_same_v<T, unsigned int> || std::is_same_v<T, long> ||
-    std::is_same_v<T, unsigned long> || std::is_same_v<T, long long> ||
-    std::is_same_v<T, unsigned long long> || std::is_same_v<T, float> || std::is_same_v<T, double>;
-};
-
-template <typename T>
-inline constexpr bool is_shuffleable_v = is_shuffleable<T>::value;
-
-/**
- * @brief Shuffle the data inside a warp
- * @tparam T the data type
- * @param val value to be shuffled
- * @param srcLane lane from where to shuffle
- * @param width lane width
- * @param mask mask of participating threads (Volta+)
- * @return the shuffled data
- */
-template <typename T>
-DI std::enable_if_t<is_shuffleable_v<T>, T> shfl(T val,
-                                                 int srcLane,
-                                                 int width     = WarpSize,
-                                                 uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  return __shfl_sync(mask, val, srcLane, width);
-#else
-  return __shfl(val, srcLane, width);
-#endif
-}
-
-/// Overload of shfl for data types not supported by the CUDA intrinsics
-template <typename T>
-DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl(T val,
-                                                  int srcLane,
-                                                  int width     = WarpSize,
-                                                  uint32_t mask = 0xffffffffu)
-{
-  using UnitT =
-    std::conditional_t<is_multiple_v<T, int>,
-                       unsigned int,
-                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
-
-  constexpr int n_words = sizeof(T) / sizeof(UnitT);
-
-  T output;
-  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
-  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
-
-  unsigned int shuffle_word;
-  shuffle_word    = shfl((unsigned int)input_alias[0], srcLane, width, mask);
-  output_alias[0] = shuffle_word;
-
-#pragma unroll
-  for (int i = 1; i < n_words; ++i) {
-    shuffle_word    = shfl((unsigned int)input_alias[i], srcLane, width, mask);
-    output_alias[i] = shuffle_word;
-  }
-
-  return output;
-}
-
-/**
- * @brief Shuffle the data inside a warp from lower lane IDs
- * @tparam T the data type
- * @param val value to be shuffled
- * @param delta lower lane ID delta from where to shuffle
- * @param width lane width
- * @param mask mask of participating threads (Volta+)
- * @return the shuffled data
- */
-template <typename T>
-DI std::enable_if_t<is_shuffleable_v<T>, T> shfl_up(T val,
-                                                    int delta,
-                                                    int width     = WarpSize,
-                                                    uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  return __shfl_up_sync(mask, val, delta, width);
-#else
-  return __shfl_up(val, delta, width);
-#endif
-}
-
-/// Overload of shfl_up for data types not supported by the CUDA intrinsics
-template <typename T>
-DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl_up(T val,
-                                                     int delta,
-                                                     int width     = WarpSize,
-                                                     uint32_t mask = 0xffffffffu)
-{
-  using UnitT =
-    std::conditional_t<is_multiple_v<T, int>,
-                       unsigned int,
-                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
-
-  constexpr int n_words = sizeof(T) / sizeof(UnitT);
-
-  T output;
-  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
-  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
-
-  unsigned int shuffle_word;
-  shuffle_word    = shfl_up((unsigned int)input_alias[0], delta, width, mask);
-  output_alias[0] = shuffle_word;
-
-#pragma unroll
-  for (int i = 1; i < n_words; ++i) {
-    shuffle_word    = shfl_up((unsigned int)input_alias[i], delta, width, mask);
-    output_alias[i] = shuffle_word;
-  }
-
-  return output;
-}
-
-/**
- * @brief Shuffle the data inside a warp
- * @tparam T the data type
- * @param val value to be shuffled
- * @param laneMask mask to be applied in order to perform xor shuffle
- * @param width lane width
- * @param mask mask of participating threads (Volta+)
- * @return the shuffled data
- */
-template <typename T>
-DI std::enable_if_t<is_shuffleable_v<T>, T> shfl_xor(T val,
-                                                     int laneMask,
-                                                     int width     = WarpSize,
-                                                     uint32_t mask = 0xffffffffu)
-{
-#if CUDART_VERSION >= 9000
-  return __shfl_xor_sync(mask, val, laneMask, width);
-#else
-  return __shfl_xor(val, laneMask, width);
-#endif
-}
-
-/// Overload of shfl_xor for data types not supported by the CUDA intrinsics
-template <typename T>
-DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl_xor(T val,
-                                                      int laneMask,
-                                                      int width     = WarpSize,
-                                                      uint32_t mask = 0xffffffffu)
-{
-  using UnitT =
-    std::conditional_t<is_multiple_v<T, int>,
-                       unsigned int,
-                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
-
-  constexpr int n_words = sizeof(T) / sizeof(UnitT);
-
-  T output;
-  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
-  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
-
-  unsigned int shuffle_word;
-  shuffle_word    = shfl_xor((unsigned int)input_alias[0], laneMask, width, mask);
-  output_alias[0] = shuffle_word;
-
-#pragma unroll
-  for (int i = 1; i < n_words; ++i) {
-    shuffle_word    = shfl_xor((unsigned int)input_alias[i], laneMask, width, mask);
-    output_alias[i] = shuffle_word;
-  }
-
-  return output;
-}
-
 /**
  * @brief Four-way byte dot product-accumulate.
  * @tparam T Four-byte integer: int or unsigned int
@@ -816,83 +587,6 @@ DI auto dp4a(unsigned int a, unsigned int b, unsigned int c) -> unsigned int
 #endif
 }
 
-/**
- * @brief Logical-warp-level reduction
- * @tparam logicalWarpSize Logical warp size (2, 4, 8, 16 or 32)
- * @tparam T Value type to be reduced
- * @tparam ReduceLambda Reduction operation type
- * @param val input value
- * @param reduce_op Reduction operation
- * @return Reduction result. All lanes will have the valid result.
- */
-template <int logicalWarpSize, typename T, typename ReduceLambda>
-DI T logicalWarpReduce(T val, ReduceLambda reduce_op)
-{
-#pragma unroll
-  for (int i = logicalWarpSize / 2; i > 0; i >>= 1) {
-    T tmp = shfl_xor(val, i);
-    val   = reduce_op(val, tmp);
-  }
-  return val;
-}
-
-/**
- * @brief Warp-level reduction
- * @tparam T Value type to be reduced
- * @tparam ReduceLambda Reduction operation type
- * @param val input value
- * @param reduce_op Reduction operation
- * @return Reduction result. All lanes will have the valid result.
- * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
- *       number of warps in a block. All threads in the warp must enter this
- *       function together
- */
-template <typename T, typename ReduceLambda>
-DI T warpReduce(T val, ReduceLambda reduce_op)
-{
-  return logicalWarpReduce<WarpSize>(val, reduce_op);
-}
-
-/**
- * @brief Warp-level sum reduction
- * @tparam T Value type to be reduced
- * @param val input value
- * @return Reduction result. All lanes will have the valid result.
- * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
- *       number of warps in a block. All threads in the warp must enter this
- *       function together
- */
-template <typename T>
-DI T warpReduce(T val)
-{
-  return warpReduce(val, raft::add_op{});
-}
-
-/**
- * @brief 1-D block-level sum reduction
- * @param val input value
- * @param smem shared memory region needed for storing intermediate results. It
- *             must alteast be of size: `sizeof(T) * nWarps`
- * @return only the thread0 will contain valid reduced result
- * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
- *       number of warps in a block. All threads in the block must enter this
- *       function together
- * @todo Expand this to support arbitrary reduction ops
- */
-template <typename T>
-DI T blockReduce(T val, char* smem)
-{
-  auto* sTemp = reinterpret_cast<T*>(smem);
-  int nWarps  = (blockDim.x + WarpSize - 1) / WarpSize;
-  int lid     = laneId();
-  int wid     = threadIdx.x / WarpSize;
-  val         = warpReduce(val);
-  if (lid == 0) sTemp[wid] = val;
-  __syncthreads();
-  val = lid < nWarps ? sTemp[lid] : T(0);
-  return warpReduce(val);
-}
-
 /**
  * @brief Simple utility function to determine whether user_stream or one of the
  * internal streams should be used.
diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh
index c9bda26b81..e3d54c51f5 100644
--- a/cpp/include/raft/util/device_loads_stores.cuh
+++ b/cpp/include/raft/util/device_loads_stores.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cstdint>                            // uintX_t
+#include <raft/core/device_span.hpp>
 #include <raft/util/cuda_dev_essentials.cuh>  // DI
 
 namespace raft {
@@ -534,6 +535,62 @@ DI void ldg(int8_t (&x)[1], const int8_t* const& addr)
   x[0] = x_int;
 }
 
+/**
+ * @brief Executes a 1D block strided copy
+ * @param dst destination pointer
+ * @param src source pointer
+ * @param size number of items to copy
+ */
+template <typename T>
+DI void block_copy(T* dst, const T* src, const size_t size)
+{
+  for (auto i = threadIdx.x; i < size; i += blockDim.x) {
+    dst[i] = src[i];
+  }
+}
+
+/**
+ * @brief Executes a 1D block strided copy
+ * @param dst span of destination pointer
+ * @param src span of source pointer
+ * @param size number of items to copy
+ */
+template <typename T>
+DI void block_copy(raft::device_span<T> dst,
+                   const raft::device_span<const T> src,
+                   const size_t size)
+{
+  assert(src.size() >= size);
+  assert(dst.size() >= size);
+  block_copy(dst.data(), src.data(), size);
+}
+
+/**
+ * @brief Executes a 1D block strided copy
+ * @param dst span of destination pointer
+ * @param src span of source pointer
+ * @param size number of items to copy
+ */
+template <typename T>
+DI void block_copy(raft::device_span<T> dst, const raft::device_span<T> src, const size_t size)
+{
+  assert(src.size() >= size);
+  assert(dst.size() >= size);
+  block_copy(dst.data(), src.data(), size);
+}
+
+/**
+ * @brief Executes a 1D block strided copy
+ * @param dst span of destination pointer
+ * @param src span of source pointer
+ */
+template <typename T>
+DI void block_copy(raft::device_span<T> dst, const raft::device_span<T> src)
+{
+  assert(dst.size() >= src.size());
+  block_copy(dst, src, src.size());
+}
+
 /** @} */
 
 }  // namespace raft
diff --git a/cpp/include/raft/util/pow2_utils.cuh b/cpp/include/raft/util/pow2_utils.cuh
index 3b42682816..68b35837b6 100644
--- a/cpp/include/raft/util/pow2_utils.cuh
+++ b/cpp/include/raft/util/pow2_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -81,6 +81,15 @@ struct Pow2 {
     return x >> I(Log2);
   }
 
+  /**
+   * Rounds up the value to next power of two.
+   */
+  template <typename I>
+  Pow2_FUNC_QUALIFIER Pow2_WHEN_INTEGRAL(I) round_up_pow2(I val) noexcept
+  {
+    return 1 << (log2(val) + 1);
+  }
+
   /**
    * x modulo Value operation (remainder of the `div(x)`)
    * (same as `x % Value` in Python).
diff --git a/cpp/include/raft/util/reduction.cuh b/cpp/include/raft/util/reduction.cuh
new file mode 100644
index 0000000000..74c57b4ca2
--- /dev/null
+++ b/cpp/include/raft/util/reduction.cuh
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <raft/core/cudart_utils.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/util/cuda_dev_essentials.cuh>
+#include <raft/util/warp_primitives.cuh>
+
+namespace raft {
+
+/**
+ * @brief Logical-warp-level reduction
+ * @tparam logicalWarpSize Logical warp size (2, 4, 8, 16 or 32)
+ * @tparam T Value type to be reduced
+ * @tparam ReduceLambda Reduction operation type
+ * @param val input value
+ * @param reduce_op Reduction operation
+ * @return Reduction result. All lanes will have the valid result.
+ */
+template <int logicalWarpSize, typename T, typename ReduceLambda>
+DI T logicalWarpReduce(T val, ReduceLambda reduce_op)
+{
+#pragma unroll
+  for (int i = logicalWarpSize / 2; i > 0; i >>= 1) {
+    T tmp = shfl_xor(val, i);
+    val   = reduce_op(val, tmp);
+  }
+  return val;
+}
+
+/**
+ * @brief Warp-level reduction
+ * @tparam T Value type to be reduced
+ * @tparam ReduceLambda Reduction operation type
+ * @param val input value
+ * @param reduce_op Reduction operation
+ * @return Reduction result. All lanes will have the valid result.
+ * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
+ *       number of warps in a block. All threads in the warp must enter this
+ *       function together
+ */
+template <typename T, typename ReduceLambda>
+DI T warpReduce(T val, ReduceLambda reduce_op)
+{
+  return logicalWarpReduce<WarpSize>(val, reduce_op);
+}
+
+/**
+ * @brief Warp-level reduction
+ * @tparam T Value type to be reduced
+ * @param val input value
+ * @return Reduction result. All lanes will have the valid result.
+ * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
+ *       number of warps in a block. All threads in the warp must enter this
+ *       function together
+ */
+template <typename T>
+DI T warpReduce(T val)
+{
+  return warpReduce(val, raft::add_op{});
+}
+
+/**
+ * @brief 1-D block-level reduction
+ * @param val input value
+ * @param smem shared memory region needed for storing intermediate results. It
+ *             must alteast be of size: `sizeof(T) * nWarps`
+ * @param reduce_op a binary reduction operation.
+ * @return only the thread0 will contain valid reduced result
+ * @note Why not cub? Because cub doesn't seem to allow working with arbitrary
+ *       number of warps in a block. All threads in the block must enter this
+ *       function together. cub also uses too many registers
+ */
+template <typename T, typename ReduceLambda = raft::add_op>
+DI T blockReduce(T val, char* smem, ReduceLambda reduce_op = raft::add_op{})
+{
+  auto* sTemp = reinterpret_cast<T*>(smem);
+  int nWarps  = (blockDim.x + WarpSize - 1) / WarpSize;
+  int lid     = laneId();
+  int wid     = threadIdx.x / WarpSize;
+  val         = warpReduce(val, reduce_op);
+  if (lid == 0) sTemp[wid] = val;
+  __syncthreads();
+  val = lid < nWarps ? sTemp[lid] : T(0);
+  return warpReduce(val, reduce_op);
+}
+
+/**
+ * @brief 1-D warp-level ranked reduction which returns the value and rank.
+ * thread 0 will have valid result and rank(idx).
+ * @param val input value
+ * @param idx index to be used as rank
+ * @param reduce_op a binary reduction operation.
+ * @return only the thread0 will contain valid reduced result
+ */
+template <typename T, typename ReduceLambda, typename i_t = int>
+DI void warpRankedReduce(T& val, i_t& idx, ReduceLambda reduce_op = raft::min_op{})
+{
+#pragma unroll
+  for (i_t offset = WarpSize / 2; offset > 0; offset /= 2) {
+    T tmpVal   = shfl(val, laneId() + offset);
+    i_t tmpIdx = shfl(idx, laneId() + offset);
+    if (reduce_op(tmpVal, val) == tmpVal) {
+      val = tmpVal;
+      idx = tmpIdx;
+    }
+  }
+}
+
+/**
+ * @brief 1-D block-level ranked reduction which returns the value and rank.
+ * thread 0 will have valid result and rank(idx).
+ * @param val input value
+ * @param shbuf shared memory region needed for storing intermediate results. It
+ *             must alteast be of size: `(sizeof(T) + sizeof(i_t)) * WarpSize`
+ * @param idx index to be used as rank
+ * @param reduce_op binary min or max operation.
+ * @return only the thread0 will contain valid reduced result
+ */
+template <typename T, typename ReduceLambda, typename i_t = int>
+DI std::pair<T, i_t> blockRankedReduce(T val,
+                                       T* shbuf,
+                                       i_t idx                = threadIdx.x,
+                                       ReduceLambda reduce_op = raft::min_op{})
+{
+  T* values    = shbuf;
+  i_t* indices = (i_t*)&shbuf[WarpSize];
+  i_t wid      = threadIdx.x / WarpSize;
+  i_t nWarps   = (blockDim.x + WarpSize - 1) / WarpSize;
+  warpRankedReduce(val, idx, reduce_op);  // Each warp performs partial reduction
+  i_t lane = laneId();
+  if (lane == 0) {
+    values[wid]  = val;  // Write reduced value to shared memory
+    indices[wid] = idx;  // Write reduced value to shared memory
+  }
+
+  __syncthreads();  // Wait for all partial reductions
+
+  // read from shared memory only if that warp existed
+  if (lane < nWarps) {
+    val = values[lane];
+    idx = indices[lane];
+  } else {
+    // get the min if it is a max op, get the max if it is a min op
+    val = reduce_op(std::numeric_limits<T>::min(), std::numeric_limits<T>::max()) ==
+              std::numeric_limits<T>::min()
+            ? std::numeric_limits<T>::max()
+            : std::numeric_limits<T>::min();
+    idx = -1;
+  }
+  __syncthreads();
+  if (wid == 0) warpRankedReduce(val, idx, reduce_op);
+  return std::pair<T, i_t>{val, idx};
+}
+
+/**
+ * @brief Executes a 1d binary block reduce
+ * @param val binary value to be reduced across the thread block
+ * @param shmem memory needed for the reduction. It should be at least of size blockDim.x/WarpSize
+ * @return only the thread0 will contain valid reduced result
+ */
+template <int BLOCK_SIZE, typename i_t>
+DI i_t binaryBlockReduce(i_t val, i_t* shmem)
+{
+  static_assert(BLOCK_SIZE <= 1024);
+  assert(val == 0 || val == 1);
+  const uint32_t mask    = __ballot_sync(~0, val);
+  const uint32_t n_items = __popc(mask);
+
+  // Each first thread of the warp
+  if (threadIdx.x % WarpSize == 0) { shmem[threadIdx.x / WarpSize] = n_items; }
+  __syncthreads();
+
+  val = (threadIdx.x < BLOCK_SIZE / WarpSize) ? shmem[threadIdx.x] : 0;
+
+  if (threadIdx.x < WarpSize) {
+    return warpReduce(val);
+  }
+  // Only first warp gets the results
+  else {
+    return -1;
+  }
+}
+
+}  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/util/warp_primitives.cuh b/cpp/include/raft/util/warp_primitives.cuh
new file mode 100644
index 0000000000..94fddbe0f3
--- /dev/null
+++ b/cpp/include/raft/util/warp_primitives.cuh
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <raft/core/cudart_utils.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/util/cuda_dev_essentials.cuh>
+
+namespace raft {
+
+/** True CUDA alignment of a type (adapted from CUB) */
+template <typename T>
+struct cuda_alignment {
+  struct Pad {
+    T val;
+    char byte;
+  };
+
+  static constexpr int bytes = sizeof(Pad) - sizeof(T);
+};
+
+template <typename LargeT, typename UnitT>
+struct is_multiple {
+  static constexpr int large_align_bytes = cuda_alignment<LargeT>::bytes;
+  static constexpr int unit_align_bytes  = cuda_alignment<UnitT>::bytes;
+  static constexpr bool value =
+    (sizeof(LargeT) % sizeof(UnitT) == 0) && (large_align_bytes % unit_align_bytes == 0);
+};
+
+template <typename LargeT, typename UnitT>
+inline constexpr bool is_multiple_v = is_multiple<LargeT, UnitT>::value;
+
+/** apply a warp-wide fence (useful from Volta+ archs) */
+DI void warpFence()
+{
+#if __CUDA_ARCH__ >= 700
+  __syncwarp();
+#endif
+}
+
+/** warp-wide any boolean aggregator */
+DI bool any(bool inFlag, uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  inFlag = __any_sync(mask, inFlag);
+#else
+  inFlag = __any(inFlag);
+#endif
+  return inFlag;
+}
+
+/** warp-wide all boolean aggregator */
+DI bool all(bool inFlag, uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  inFlag = __all_sync(mask, inFlag);
+#else
+  inFlag = __all(inFlag);
+#endif
+  return inFlag;
+}
+
+/** For every thread in the warp, set the corresponding bit to the thread's flag value.  */
+DI uint32_t ballot(bool inFlag, uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  return __ballot_sync(mask, inFlag);
+#else
+  return __ballot(inFlag);
+#endif
+}
+
+template <typename T>
+struct is_shuffleable {
+  static constexpr bool value =
+    std::is_same_v<T, int> || std::is_same_v<T, unsigned int> || std::is_same_v<T, long> ||
+    std::is_same_v<T, unsigned long> || std::is_same_v<T, long long> ||
+    std::is_same_v<T, unsigned long long> || std::is_same_v<T, float> || std::is_same_v<T, double>;
+};
+
+template <typename T>
+inline constexpr bool is_shuffleable_v = is_shuffleable<T>::value;
+
+/**
+ * @brief Shuffle the data inside a warp
+ * @tparam T the data type
+ * @param val value to be shuffled
+ * @param srcLane lane from where to shuffle
+ * @param width lane width
+ * @param mask mask of participating threads (Volta+)
+ * @return the shuffled data
+ */
+template <typename T>
+DI std::enable_if_t<is_shuffleable_v<T>, T> shfl(T val,
+                                                 int srcLane,
+                                                 int width     = WarpSize,
+                                                 uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  return __shfl_sync(mask, val, srcLane, width);
+#else
+  return __shfl(val, srcLane, width);
+#endif
+}
+
+/// Overload of shfl for data types not supported by the CUDA intrinsics
+template <typename T>
+DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl(T val,
+                                                  int srcLane,
+                                                  int width     = WarpSize,
+                                                  uint32_t mask = 0xffffffffu)
+{
+  using UnitT =
+    std::conditional_t<is_multiple_v<T, int>,
+                       unsigned int,
+                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
+
+  constexpr int n_words = sizeof(T) / sizeof(UnitT);
+
+  T output;
+  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
+  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
+
+  unsigned int shuffle_word;
+  shuffle_word    = shfl((unsigned int)input_alias[0], srcLane, width, mask);
+  output_alias[0] = shuffle_word;
+
+#pragma unroll
+  for (int i = 1; i < n_words; ++i) {
+    shuffle_word    = shfl((unsigned int)input_alias[i], srcLane, width, mask);
+    output_alias[i] = shuffle_word;
+  }
+
+  return output;
+}
+
+/**
+ * @brief Shuffle the data inside a warp from lower lane IDs
+ * @tparam T the data type
+ * @param val value to be shuffled
+ * @param delta lower lane ID delta from where to shuffle
+ * @param width lane width
+ * @param mask mask of participating threads (Volta+)
+ * @return the shuffled data
+ */
+template <typename T>
+DI std::enable_if_t<is_shuffleable_v<T>, T> shfl_up(T val,
+                                                    int delta,
+                                                    int width     = WarpSize,
+                                                    uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  return __shfl_up_sync(mask, val, delta, width);
+#else
+  return __shfl_up(val, delta, width);
+#endif
+}
+
+/// Overload of shfl_up for data types not supported by the CUDA intrinsics
+template <typename T>
+DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl_up(T val,
+                                                     int delta,
+                                                     int width     = WarpSize,
+                                                     uint32_t mask = 0xffffffffu)
+{
+  using UnitT =
+    std::conditional_t<is_multiple_v<T, int>,
+                       unsigned int,
+                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
+
+  constexpr int n_words = sizeof(T) / sizeof(UnitT);
+
+  T output;
+  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
+  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
+
+  unsigned int shuffle_word;
+  shuffle_word    = shfl_up((unsigned int)input_alias[0], delta, width, mask);
+  output_alias[0] = shuffle_word;
+
+#pragma unroll
+  for (int i = 1; i < n_words; ++i) {
+    shuffle_word    = shfl_up((unsigned int)input_alias[i], delta, width, mask);
+    output_alias[i] = shuffle_word;
+  }
+
+  return output;
+}
+
+/**
+ * @brief Shuffle the data inside a warp
+ * @tparam T the data type
+ * @param val value to be shuffled
+ * @param laneMask mask to be applied in order to perform xor shuffle
+ * @param width lane width
+ * @param mask mask of participating threads (Volta+)
+ * @return the shuffled data
+ */
+template <typename T>
+DI std::enable_if_t<is_shuffleable_v<T>, T> shfl_xor(T val,
+                                                     int laneMask,
+                                                     int width     = WarpSize,
+                                                     uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  return __shfl_xor_sync(mask, val, laneMask, width);
+#else
+  return __shfl_xor(val, laneMask, width);
+#endif
+}
+
+/// Overload of shfl_xor for data types not supported by the CUDA intrinsics
+template <typename T>
+DI std::enable_if_t<!is_shuffleable_v<T>, T> shfl_xor(T val,
+                                                      int laneMask,
+                                                      int width     = WarpSize,
+                                                      uint32_t mask = 0xffffffffu)
+{
+  using UnitT =
+    std::conditional_t<is_multiple_v<T, int>,
+                       unsigned int,
+                       std::conditional_t<is_multiple_v<T, short>, unsigned short, unsigned char>>;
+
+  constexpr int n_words = sizeof(T) / sizeof(UnitT);
+
+  T output;
+  UnitT* output_alias = reinterpret_cast<UnitT*>(&output);
+  UnitT* input_alias  = reinterpret_cast<UnitT*>(&val);
+
+  unsigned int shuffle_word;
+  shuffle_word    = shfl_xor((unsigned int)input_alias[0], laneMask, width, mask);
+  output_alias[0] = shuffle_word;
+
+#pragma unroll
+  for (int i = 1; i < n_words; ++i) {
+    shuffle_word    = shfl_xor((unsigned int)input_alias[i], laneMask, width, mask);
+    output_alias[i] = shuffle_word;
+  }
+
+  return output;
+}
+
+}  // namespace raft
\ No newline at end of file
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 88ad7772c2..98ce8ac5bd 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -372,7 +372,15 @@ if(BUILD_TESTS)
   )
 
   ConfigureTest(
-    NAME UTILS_TEST PATH test/core/seive.cu test/util/bitonic_sort.cu test/util/cudart_utils.cpp
-    test/util/device_atomics.cu test/util/integer_utils.cpp test/util/pow2_utils.cu
+    NAME
+    UTILS_TEST
+    PATH
+    test/core/seive.cu
+    test/util/bitonic_sort.cu
+    test/util/cudart_utils.cpp
+    test/util/device_atomics.cu
+    test/util/integer_utils.cpp
+    test/util/pow2_utils.cu
+    test/util/reduction.cu
   )
 endif()
diff --git a/cpp/test/util/reduction.cu b/cpp/test/util/reduction.cu
new file mode 100644
index 0000000000..17deaf99eb
--- /dev/null
+++ b/cpp/test/util/reduction.cu
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_utils.cuh"
+
+#include <raft/random/device/sample.cuh>
+#include <raft/random/rng.cuh>
+#include <raft/util/reduction.cuh>
+
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <numeric>
+
+namespace raft::util {
+
+constexpr int max_warps_per_block = 32;
+
+template <typename ReduceLambda>
+__global__ void test_reduction_kernel(const int* input, int* reduction_res, ReduceLambda reduce_op)
+{
+  assert(gridDim.x == 1);
+  __shared__ int red_buf[max_warps_per_block];
+  int th_val = input[threadIdx.x];
+  th_val     = raft::blockReduce(th_val, (char*)red_buf, reduce_op);
+  if (threadIdx.x == 0) { reduction_res[0] = th_val; }
+}
+
+template <typename ReduceLambda>
+__global__ void test_ranked_reduction_kernel(const int* input,
+                                             int* reduction_res,
+                                             int* out_rank,
+                                             ReduceLambda reduce_op)
+{
+  assert(gridDim.x == 1);
+  __shared__ int red_buf[2 * max_warps_per_block];
+  int th_val  = input[threadIdx.x];
+  int th_rank = threadIdx.x;
+  auto result = raft::blockRankedReduce(th_val, red_buf, th_rank, reduce_op);
+  if (threadIdx.x == 0) {
+    reduction_res[0] = result.first;
+    out_rank[0]      = result.second;
+  }
+}
+
+__global__ void test_block_random_sample_kernel(const int* input, int* reduction_res)
+{
+  assert(gridDim.x == 1);
+  __shared__ int red_buf[2 * max_warps_per_block];
+  raft::random::PCGenerator thread_rng(1234, threadIdx.x, 0);
+  int th_val  = input[threadIdx.x];
+  int th_rank = threadIdx.x;
+  int result  = raft::random::device::block_random_sample(thread_rng, red_buf, th_val, th_rank);
+  if (threadIdx.x == 0) { reduction_res[0] = result; }
+}
+
+template <int TPB>
+__global__ void test_binary_reduction_kernel(const int* input, int* reduction_res)
+{
+  assert(gridDim.x == 1);
+  __shared__ int shared[TPB / WarpSize];
+  int th_val = input[threadIdx.x];
+  int result = raft::binaryBlockReduce<TPB>(th_val, shared);
+  if (threadIdx.x == 0) { reduction_res[0] = result; }
+}
+
+struct reduction_launch {
+  template <typename ReduceLambda>
+  static void run(const rmm::device_uvector<int>& arr_d,
+                  int ref_val,
+                  ReduceLambda reduce_op,
+                  rmm::cuda_stream_view stream)
+  {
+    rmm::device_scalar<int> ref_d(stream);
+    const int block_dim = 64;
+    const int grid_dim  = 1;
+    test_reduction_kernel<<<grid_dim, block_dim, 0, stream>>>(
+      arr_d.data(), ref_d.data(), reduce_op);
+    stream.synchronize();
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+    ASSERT_EQ(ref_d.value(stream), ref_val);
+  }
+
+  template <typename ReduceLambda>
+  static void run_ranked(const rmm::device_uvector<int>& arr_d,
+                         int ref_val,
+                         int rank_ref_val,
+                         ReduceLambda reduce_op,
+                         rmm::cuda_stream_view stream)
+  {
+    rmm::device_scalar<int> ref_d(stream);
+    rmm::device_scalar<int> rank_d(stream);
+    const int block_dim = 64;
+    const int grid_dim  = 1;
+    test_ranked_reduction_kernel<<<grid_dim, block_dim, 0, stream>>>(
+      arr_d.data(), ref_d.data(), rank_d.data(), reduce_op);
+    stream.synchronize();
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+    ASSERT_EQ(ref_d.value(stream), ref_val);
+    ASSERT_EQ(rank_d.value(stream), rank_ref_val);
+  }
+
+  static void run_random_sample(const rmm::device_uvector<int>& arr_d,
+                                int ref_val,
+                                rmm::cuda_stream_view stream)
+  {
+    rmm::device_scalar<int> ref_d(stream);
+    const int block_dim = 64;
+    const int grid_dim  = 1;
+    test_block_random_sample_kernel<<<grid_dim, block_dim, 0, stream>>>(arr_d.data(), ref_d.data());
+    stream.synchronize();
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+    ASSERT_EQ(ref_d.value(stream), ref_val);
+  }
+
+  static void run_binary(const rmm::device_uvector<int>& arr_d,
+                         int ref_val,
+                         rmm::cuda_stream_view stream)
+  {
+    rmm::device_scalar<int> ref_d(stream);
+    constexpr int block_dim = 64;
+    const int grid_dim      = 1;
+    test_binary_reduction_kernel<block_dim>
+      <<<grid_dim, block_dim, 0, stream>>>(arr_d.data(), ref_d.data());
+    stream.synchronize();
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+    ASSERT_EQ(ref_d.value(stream), ref_val);
+  }
+};
+
+template <typename T>
+class ReductionTest : public testing::TestWithParam<std::vector<int>> {  // NOLINT
+ protected:
+  const std::vector<int> input;                                          // NOLINT
+  rmm::cuda_stream_view stream;                                          // NOLINT
+  rmm::device_uvector<int> arr_d;                                        // NOLINT
+
+ public:
+  explicit ReductionTest()
+    : input(testing::TestWithParam<std::vector<int>>::GetParam()),
+      stream(rmm::cuda_stream_default),
+      arr_d(input.size(), stream)
+  {
+    update_device(arr_d.data(), input.data(), input.size(), stream);
+  }
+
+  void run_reduction()
+  {
+    // calculate the results
+    reduction_launch::run(arr_d, 0, raft::min_op{}, stream);
+    reduction_launch::run(arr_d, 5, raft::max_op{}, stream);
+    reduction_launch::run(arr_d, 158, raft::add_op{}, stream);
+    reduction_launch::run_ranked(arr_d, 5, 15, raft::max_op{}, stream);
+    reduction_launch::run_ranked(arr_d, 0, 26, raft::min_op{}, stream);
+    // value 15 is for the current state of PCgenerator. adjust this if rng changes
+    reduction_launch::run_random_sample(arr_d, 15, stream);
+  }
+
+  void run_binary_reduction() { reduction_launch::run_binary(arr_d, 24, stream); }
+};
+
+const std::vector<int> test_vector{1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 5, 1, 2, 3, 4, 1, 2,
+                                   3, 4, 1, 2, 0, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                   1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4};
+const std::vector<int> binary_test_vector{
+  1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
+  1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0};
+auto reduction_input        = ::testing::Values(test_vector);
+auto binary_reduction_input = ::testing::Values(binary_test_vector);
+
+using ReductionTestInt       = ReductionTest<int>;                            // NOLINT
+using BinaryReductionTestInt = ReductionTest<int>;                            // NOLINT
+TEST_P(ReductionTestInt, REDUCTIONS) { run_reduction(); }
+INSTANTIATE_TEST_CASE_P(ReductionTest, ReductionTestInt, reduction_input);    // NOLINT
+TEST_P(BinaryReductionTestInt, BINARY_REDUCTION) { run_binary_reduction(); }  // NOLINT
+INSTANTIATE_TEST_CASE_P(BinaryReductionTest,
+                        BinaryReductionTestInt,
+                        binary_reduction_input);  // NOLINT
+
+}  // namespace raft::util

From 650699ba9fcab08eff365cd42ebf77ad5a280060 Mon Sep 17 00:00:00 2001
From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com>
Date: Thu, 18 May 2023 13:44:18 -0500
Subject: [PATCH 63/78] run docs nightly too (#1520)

This PR configures `raft` docs builds to also run nightly (not just on PR merges only)

Authors:
  - Jake Awe (https://github.com/AyodeAwe)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/raft/pull/1520
---
 .github/workflows/build.yaml | 9 ++++++---
 ci/build_docs.sh             | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 0f5f84c158..bed83fca98 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -54,16 +54,19 @@ jobs:
       sha: ${{ inputs.sha }}
       skip_upload_pkgs: libraft-template
   docs-build:
-    if: github.ref_type == 'branch' && github.event_name == 'push'
+    if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06
     with:
-      build_type: branch
-      node_type: "gpu-v100-latest-1"
       arch: "amd64"
+      branch: ${{ inputs.branch }}
+      build_type: ${{ inputs.build_type || 'branch' }}
       container_image: "rapidsai/ci:latest"
+      date: ${{ inputs.date }}
+      node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
+      sha: ${{ inputs.sha }}
   wheel-build-pylibraft:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index e52beb22ea..b1cb993798 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -42,7 +42,7 @@ sphinx-build -b text source _text
 popd
 
 
-if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
+if [[ ${RAPIDS_BUILD_TYPE} != "pull-request" ]]; then
   rapids-logger "Upload Docs to S3"
   aws s3 sync --no-progress --delete docs/_html "s3://rapidsai-docs/raft/${VERSION_NUMBER}/html"
   aws s3 sync --no-progress --delete docs/_text "s3://rapidsai-docs/raft/${VERSION_NUMBER}/txt"

From 29d1c15bf87bf17b92bf7b8b76eb6f73ca1b76f3 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Thu, 18 May 2023 13:19:04 -0700
Subject: [PATCH 64/78] Fix failing TiledKNNTest unittest (#1533)

The TiledKNNTest test was faiiling - and it seems to be because the matrix::select_k code isn't guaranteed to return elements in sorted order. The test was expecting outputs to be sorted, and was failing because of it. This change fixes the test to sort the outputs before comparing.

Closes https://github.com/rapidsai/raft/issues/1526

Authors:
  - Ben Frederickson (https://github.com/benfred)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1533
---
 cpp/test/neighbors/knn_utils.cuh | 21 ++++++++++++++++++---
 cpp/test/neighbors/tiled_knn.cu  |  3 ++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/cpp/test/neighbors/knn_utils.cuh b/cpp/test/neighbors/knn_utils.cuh
index ac34699ac5..b79e7cffe8 100644
--- a/cpp/test/neighbors/knn_utils.cuh
+++ b/cpp/test/neighbors/knn_utils.cuh
@@ -46,7 +46,8 @@ testing::AssertionResult devArrMatchKnnPair(const T* expected_idx,
                                             size_t rows,
                                             size_t cols,
                                             const DistT eps,
-                                            cudaStream_t stream = 0)
+                                            cudaStream_t stream = 0,
+                                            bool sort_inputs    = false)
 {
   size_t size = rows * cols;
   std::unique_ptr<T[]> exp_idx_h(new T[size]);
@@ -57,16 +58,30 @@ testing::AssertionResult devArrMatchKnnPair(const T* expected_idx,
   raft::update_host<T>(act_idx_h.get(), actual_idx, size, stream);
   raft::update_host<DistT>(exp_dist_h.get(), expected_dist, size, stream);
   raft::update_host<DistT>(act_dist_h.get(), actual_dist, size, stream);
+
   RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
   for (size_t i(0); i < rows; ++i) {
+    std::vector<std::pair<DistT, T>> actual;
+    std::vector<std::pair<DistT, T>> expected;
     for (size_t j(0); j < cols; ++j) {
       auto idx      = i * cols + j;  // row major assumption!
       auto exp_idx  = exp_idx_h.get()[idx];
       auto act_idx  = act_idx_h.get()[idx];
       auto exp_dist = exp_dist_h.get()[idx];
       auto act_dist = act_dist_h.get()[idx];
-      idx_dist_pair exp_kvp(exp_idx, exp_dist, raft::CompareApprox<DistT>(eps));
-      idx_dist_pair act_kvp(act_idx, act_dist, raft::CompareApprox<DistT>(eps));
+      actual.push_back(std::make_pair(act_dist, act_idx));
+      expected.push_back(std::make_pair(exp_dist, exp_idx));
+    }
+    if (sort_inputs) {
+      // inputs could be unsorted here, sort for comparison
+      std::sort(actual.begin(), actual.end());
+      std::sort(expected.begin(), expected.end());
+    }
+    for (size_t j(0); j < cols; ++j) {
+      auto act = actual[j];
+      auto exp = expected[j];
+      idx_dist_pair exp_kvp(exp.second, exp.first, raft::CompareApprox<DistT>(eps));
+      idx_dist_pair act_kvp(act.second, act.first, raft::CompareApprox<DistT>(eps));
       if (!(exp_kvp == act_kvp)) {
         return testing::AssertionFailure()
                << "actual=" << act_kvp.idx << "," << act_kvp.dist << "!="
diff --git a/cpp/test/neighbors/tiled_knn.cu b/cpp/test/neighbors/tiled_knn.cu
index e7c41cbd93..2ab82b845e 100644
--- a/cpp/test/neighbors/tiled_knn.cu
+++ b/cpp/test/neighbors/tiled_knn.cu
@@ -178,7 +178,8 @@ class TiledKNNTest : public ::testing::TestWithParam<TiledKNNInputs> {
                                                        num_queries,
                                                        k_,
                                                        float(0.001),
-                                                       stream_));
+                                                       stream_,
+                                                       true));
   }
 
   void SetUp() override

From dfb3d2cef2907290e8910f19a8b1a2cfb766feed Mon Sep 17 00:00:00 2001
From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com>
Date: Fri, 19 May 2023 10:49:29 +0200
Subject: [PATCH 65/78] ivf-flat: fix incorrect recomputed size of the index
 (#1525)

Fix ivf-flat's `recompute_internal_state` incorrectly using the amortized list sizes to compute the index size.

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1525
---
 cpp/include/raft/neighbors/ivf_flat_types.hpp | 27 +++++++++++--------
 cpp/test/neighbors/ann_ivf_flat.cuh           |  1 +
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp
index ccdc3f28da..2e2e49cdbc 100644
--- a/cpp/include/raft/neighbors/ivf_flat_types.hpp
+++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp
@@ -23,15 +23,18 @@
 #include <raft/core/error.hpp>
 #include <raft/core/host_mdarray.hpp>
 #include <raft/core/mdspan_types.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/core/resource/thrust_policy.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/ivf_list_types.hpp>
 #include <raft/util/integer_utils.hpp>
 
+#include <thrust/reduce.h>
+
 #include <algorithm>  // std::max
 #include <memory>
 #include <optional>
-#include <thrust/fill.h>
 #include <type_traits>
 
 namespace raft::neighbors::ivf_flat {
@@ -303,20 +306,22 @@ struct index : ann::index {
     auto stream = resource::get_cuda_stream(res);
 
     // Actualize the list pointers
-    auto this_lists           = lists();
-    auto this_data_ptrs       = data_ptrs();
-    auto this_inds_ptrs       = inds_ptrs();
-    IdxT recompute_total_size = 0;
+    auto this_lists     = lists();
+    auto this_data_ptrs = data_ptrs();
+    auto this_inds_ptrs = inds_ptrs();
     for (uint32_t label = 0; label < this_lists.size(); label++) {
-      auto& list           = this_lists[label];
-      const auto data_ptr  = list ? list->data.data_handle() : nullptr;
-      const auto inds_ptr  = list ? list->indices.data_handle() : nullptr;
-      const auto list_size = list ? IdxT(list->size) : 0;
+      auto& list          = this_lists[label];
+      const auto data_ptr = list ? list->data.data_handle() : nullptr;
+      const auto inds_ptr = list ? list->indices.data_handle() : nullptr;
       copy(&this_data_ptrs(label), &data_ptr, 1, stream);
       copy(&this_inds_ptrs(label), &inds_ptr, 1, stream);
-      recompute_total_size += list_size;
     }
-    total_size_ = recompute_total_size;
+    auto this_list_sizes = list_sizes().data_handle();
+    total_size_          = thrust::reduce(resource::get_thrust_policy(res),
+                                 this_list_sizes,
+                                 this_list_sizes + this_lists.size(),
+                                 0,
+                                 raft::add_op{});
     check_consistency();
   }
 
diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh
index 1c9406e8a9..88bf53280b 100644
--- a/cpp/test/neighbors/ann_ivf_flat.cuh
+++ b/cpp/test/neighbors/ann_ivf_flat.cuh
@@ -201,6 +201,7 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
         ivf_flat::detail::serialize(handle_, "ivf_flat_index", index_2);
 
         auto index_loaded = ivf_flat::detail::deserialize<DataT, IdxT>(handle_, "ivf_flat_index");
+        ASSERT_EQ(index_2.size(), index_loaded.size());
 
         ivf_flat::search(handle_,
                          search_params,

From af7e06760fe0ed8ca6c7c59dec5269559394da6d Mon Sep 17 00:00:00 2001
From: Tamas Bela Feher <tfeher@nvidia.com>
Date: Fri, 19 May 2023 14:17:08 +0200
Subject: [PATCH 66/78] Python API for IVF-Flat serialization (#1516)

This PR adds Python API for IVF-Flat serialization.

closes #752

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1516
---
 cpp/CMakeLists.txt                            |   1 +
 .../neighbors/detail/ivf_flat_serialize.cuh   |  10 +-
 .../raft_runtime/neighbors/ivf_flat.hpp       |  17 +-
 .../neighbors/ivf_flat_serialize.cu           |  65 ++++++++
 .../pylibraft/neighbors/ivf_flat/__init__.py  |  13 +-
 .../neighbors/ivf_flat/cpp/c_ivf_flat.pxd     |  48 ++++++
 .../pylibraft/neighbors/ivf_flat/ivf_flat.pyx | 150 ++++++++++++++++++
 .../pylibraft/pylibraft/test/test_ivf_flat.py |  47 ++++++
 8 files changed, 348 insertions(+), 3 deletions(-)
 create mode 100644 cpp/src/raft_runtime/neighbors/ivf_flat_serialize.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 68ff4f3bb6..eb35554768 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -365,6 +365,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
     src/raft_runtime/neighbors/ivf_flat_build.cu
     src/raft_runtime/neighbors/ivf_flat_search.cu
+    src/raft_runtime/neighbors/ivf_flat_serialize.cu
     src/raft_runtime/neighbors/ivfpq_build.cu
     src/raft_runtime/neighbors/ivfpq_deserialize.cu
     src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
index af2e6ba0f8..b00d308827 100644
--- a/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_flat_serialize.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/detail/mdspan_numpy_serializer.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/serialize.hpp>
@@ -33,7 +34,7 @@ namespace raft::neighbors::ivf_flat::detail {
 // backward compatibility.
 // TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward
 //             compatible fashion.
-constexpr int serialization_version = 3;
+constexpr int serialization_version = 4;
 
 // NB: we wrap this check in a struct, so that the updated RealSize is easy to see in the error
 // message.
@@ -62,6 +63,10 @@ void serialize(raft::resources const& handle, std::ostream& os, const index<T, I
   RAFT_LOG_DEBUG(
     "Saving IVF-Flat index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());
 
+  std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype<T>().to_string();
+  dtype_string.resize(4);
+  os << dtype_string;
+
   serialize_scalar(handle, os, serialization_version);
   serialize_scalar(handle, os, index_.size());
   serialize_scalar(handle, os, index_.dim());
@@ -123,6 +128,9 @@ void serialize(raft::resources const& handle,
 template <typename T, typename IdxT>
 auto deserialize(raft::resources const& handle, std::istream& is) -> index<T, IdxT>
 {
+  char dtype_string[4];
+  is.read(dtype_string, 4);
+
   auto ver = deserialize_scalar<int>(handle, is);
   if (ver != serialization_version) {
     RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver);
diff --git a/cpp/include/raft_runtime/neighbors/ivf_flat.hpp b/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
index 37a9d39ae3..5b8918ec7f 100644
--- a/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
+++ b/cpp/include/raft_runtime/neighbors/ivf_flat.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/neighbors/ivf_flat_types.hpp>
+#include <string>
 
 namespace raft::runtime::neighbors::ivf_flat {
 
@@ -43,7 +44,21 @@ namespace raft::runtime::neighbors::ivf_flat {
   void extend(raft::resources const& handle,                                         \
               raft::device_matrix_view<const T, IdxT, row_major> new_vectors,        \
               std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, \
-              raft::neighbors::ivf_flat::index<T, IdxT>* idx);
+              raft::neighbors::ivf_flat::index<T, IdxT>* idx);                       \
+                                                                                     \
+  void serialize_file(raft::resources const& handle,                                 \
+                      const std::string& filename,                                   \
+                      const raft::neighbors::ivf_flat::index<T, IdxT>& index);       \
+                                                                                     \
+  void deserialize_file(raft::resources const& handle,                               \
+                        const std::string& filename,                                 \
+                        raft::neighbors::ivf_flat::index<T, IdxT>* index);           \
+  void serialize(raft::resources const& handle,                                      \
+                 std::string& str,                                                   \
+                 const raft::neighbors::ivf_flat::index<T, IdxT>& index);            \
+  void deserialize(raft::resources const& handle,                                    \
+                   const std::string& str,                                           \
+                   raft::neighbors::ivf_flat::index<T, IdxT>*);
 
 RAFT_INST_BUILD_EXTEND(float, int64_t)
 RAFT_INST_BUILD_EXTEND(int8_t, int64_t)
diff --git a/cpp/src/raft_runtime/neighbors/ivf_flat_serialize.cu b/cpp/src/raft_runtime/neighbors/ivf_flat_serialize.cu
new file mode 100644
index 0000000000..049b8b00da
--- /dev/null
+++ b/cpp/src/raft_runtime/neighbors/ivf_flat_serialize.cu
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+#include <string>
+
+#include <raft/core/device_resources.hpp>
+#include <raft/neighbors/ivf_flat_serialize.cuh>
+#include <raft/neighbors/ivf_flat_types.hpp>
+#include <raft_runtime/neighbors/ivf_flat.hpp>
+
+namespace raft::runtime::neighbors::ivf_flat {
+
+#define RAFT_IVF_FLAT_SERIALIZE_INST(DTYPE)                                            \
+  void serialize_file(raft::resources const& handle,                                   \
+                      const std::string& filename,                                     \
+                      const raft::neighbors::ivf_flat::index<DTYPE, int64_t>& index)   \
+  {                                                                                    \
+    raft::neighbors::ivf_flat::serialize(handle, filename, index);                     \
+  };                                                                                   \
+                                                                                       \
+  void deserialize_file(raft::resources const& handle,                                 \
+                        const std::string& filename,                                   \
+                        raft::neighbors::ivf_flat::index<DTYPE, int64_t>* index)       \
+  {                                                                                    \
+    if (!index) { RAFT_FAIL("Invalid index pointer"); }                                \
+    *index = raft::neighbors::ivf_flat::deserialize<DTYPE, int64_t>(handle, filename); \
+  };                                                                                   \
+  void serialize(raft::resources const& handle,                                        \
+                 std::string& str,                                                     \
+                 const raft::neighbors::ivf_flat::index<DTYPE, int64_t>& index)        \
+  {                                                                                    \
+    std::stringstream os;                                                              \
+    raft::neighbors::ivf_flat::serialize(handle, os, index);                           \
+    str = os.str();                                                                    \
+  }                                                                                    \
+                                                                                       \
+  void deserialize(raft::resources const& handle,                                      \
+                   const std::string& str,                                             \
+                   raft::neighbors::ivf_flat::index<DTYPE, int64_t>* index)            \
+  {                                                                                    \
+    std::istringstream is(str);                                                        \
+    if (!index) { RAFT_FAIL("Invalid index pointer"); }                                \
+    *index = raft::neighbors::ivf_flat::deserialize<DTYPE, int64_t>(handle, is);       \
+  }
+
+RAFT_IVF_FLAT_SERIALIZE_INST(float);
+RAFT_IVF_FLAT_SERIALIZE_INST(int8_t);
+RAFT_IVF_FLAT_SERIALIZE_INST(uint8_t);
+
+#undef RAFT_IVF_FLAT_SERIALIZE_INST
+}  // namespace raft::runtime::neighbors::ivf_flat
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/__init__.py b/python/pylibraft/pylibraft/neighbors/ivf_flat/__init__.py
index 58fd88b873..057cb98f17 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_flat/__init__.py
+++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/__init__.py
@@ -13,7 +13,16 @@
 # limitations under the License.
 #
 
-from .ivf_flat import Index, IndexParams, SearchParams, build, extend, search
+from .ivf_flat import (
+    Index,
+    IndexParams,
+    SearchParams,
+    build,
+    extend,
+    load,
+    save,
+    search,
+)
 
 __all__ = [
     "Index",
@@ -22,4 +31,6 @@
     "build",
     "extend",
     "search",
+    "save",
+    "load",
 ]
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd
index 31a251e7c2..a281d33310 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd
+++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd
@@ -133,3 +133,51 @@ cdef extern from "raft_runtime/neighbors/ivf_flat.hpp" \
         device_matrix_view[uint8_t, int64_t, row_major] queries,
         device_matrix_view[int64_t, int64_t, row_major] neighbors,
         device_matrix_view[float, int64_t, row_major] distances) except +
+
+    cdef void serialize(const device_resources& handle,
+                        string& str,
+                        const index[float, int64_t]& index) except +
+
+    cdef void deserialize(const device_resources& handle,
+                          const string& str,
+                          index[float, int64_t]* index) except +
+
+    cdef void serialize(const device_resources& handle,
+                        string& str,
+                        const index[uint8_t, int64_t]& index) except +
+
+    cdef void deserialize(const device_resources& handle,
+                          const string& str,
+                          index[uint8_t, int64_t]* index) except +
+
+    cdef void serialize(const device_resources& handle,
+                        string& str,
+                        const index[int8_t, int64_t]& index) except +
+
+    cdef void deserialize(const device_resources& handle,
+                          const string& str,
+                          index[int8_t, int64_t]* index) except +
+
+    cdef void serialize_file(const device_resources& handle,
+                             const string& filename,
+                             const index[float, int64_t]& index) except +
+
+    cdef void deserialize_file(const device_resources& handle,
+                               const string& filename,
+                               index[float, int64_t]* index) except +
+
+    cdef void serialize_file(const device_resources& handle,
+                             const string& filename,
+                             const index[uint8_t, int64_t]& index) except +
+
+    cdef void deserialize_file(const device_resources& handle,
+                               const string& filename,
+                               index[uint8_t, int64_t]* index) except +
+
+    cdef void serialize_file(const device_resources& handle,
+                             const string& filename,
+                             const index[int8_t, int64_t]& index) except +
+
+    cdef void deserialize_file(const device_resources& handle,
+                               const string& filename,
+                               index[int8_t, int64_t]* index) except +
diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
index 352376fe17..0e550547d3 100644
--- a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
+++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
@@ -708,3 +708,153 @@ def search(SearchParams search_params,
         raise ValueError("query dtype %s not supported" % queries_dt)
 
     return (distances, neighbors)
+
+
+@auto_sync_handle
+def save(filename, Index index, handle=None):
+    """
+    Saves the index to file.
+
+    Saving / loading the index is experimental. The serialization format is
+    subject to change.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file.
+    index : Index
+        Trained IVF-Flat index.
+    {handle_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+
+    >>> from pylibraft.common import DeviceResources
+    >>> from pylibraft.neighbors import ivf_flat
+
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+
+    >>> # Build index
+    >>> handle = DeviceResources()
+    >>> index = ivf_flat.build(ivf_flat.IndexParams(), dataset, handle=handle)
+    >>> ivf_flat.save("my_index.bin", index, handle=handle)
+    """
+    if not index.trained:
+        raise ValueError("Index need to be built before saving it.")
+
+    if handle is None:
+        handle = DeviceResources()
+    cdef device_resources* handle_ = \
+        <device_resources*><size_t>handle.getHandle()
+
+    cdef string c_filename = filename.encode('utf-8')
+
+    cdef IndexFloat idx_float
+    cdef IndexInt8 idx_int8
+    cdef IndexUint8 idx_uint8
+
+    if index.active_index_type == "float32":
+        idx_float = index
+        c_ivf_flat.serialize_file(
+            deref(handle_), c_filename, deref(idx_float.index))
+    elif index.active_index_type == "byte":
+        idx_int8 = index
+        c_ivf_flat.serialize_file(
+            deref(handle_), c_filename, deref(idx_int8.index))
+    elif index.active_index_type == "ubyte":
+        idx_uint8 = index
+        c_ivf_flat.serialize_file(
+            deref(handle_), c_filename, deref(idx_uint8.index))
+    else:
+        raise ValueError(
+            "Index dtype %s not supported" % index.active_index_type)
+
+
+@auto_sync_handle
+def load(filename, handle=None):
+    """
+    Loads index from file.
+
+    Saving / loading the index is experimental. The serialization format is
+    subject to change, therefore loading an index saved with a previous
+    version of raft is not guaranteed to work.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file.
+    {handle_docstring}
+
+    Returns
+    -------
+    index : Index
+
+    Examples
+    --------
+    >>> import cupy as cp
+
+    >>> from pylibraft.common import DeviceResources
+    >>> from pylibraft.neighbors import ivf_flat
+
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+
+    >>> # Build and save index
+    >>> handle = DeviceResources()
+    >>> index = ivf_flat.build(ivf_flat.IndexParams(), dataset, handle=handle)
+    >>> ivf_flat.save("my_index.bin", index, handle=handle)
+    >>> del index
+
+    >>> n_queries = 100
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> handle = DeviceResources()
+    >>> index = ivf_flat.load("my_index.bin", handle=handle)
+
+    >>> distances, neighbors = ivf_flat.search(ivf_pq.SearchParams(), index,
+    ...                                      queries, k=10, handle=handle)
+    """
+    if handle is None:
+        handle = DeviceResources()
+    cdef device_resources* handle_ = \
+        <device_resources*><size_t>handle.getHandle()
+
+    cdef string c_filename = filename.encode('utf-8')
+    cdef IndexFloat idx_float
+    cdef IndexInt8 idx_int8
+    cdef IndexUint8 idx_uint8
+
+    with open(filename, 'rb') as f:
+        type_str = f.read(3).decode('utf-8')
+
+    dataset_dt = np.dtype(type_str)
+
+    if dataset_dt == np.float32:
+        idx_float = IndexFloat(handle)
+        c_ivf_flat.deserialize_file(
+            deref(handle_), c_filename, idx_float.index)
+        idx_float.trained = True
+        idx_float.active_index_type = 'float32'
+        return idx_float
+    elif dataset_dt == np.byte:
+        idx_int8 = IndexInt8(handle)
+        c_ivf_flat.deserialize_file(
+            deref(handle_), c_filename, idx_int8.index)
+        idx_int8.trained = True
+        idx_int8.active_index_type = 'byte'
+        return idx_int8
+    elif dataset_dt == np.ubyte:
+        idx_uint8 = IndexUint8(handle)
+        c_ivf_flat.deserialize_file(
+            deref(handle_), c_filename, idx_uint8.index)
+        idx_uint8.trained = True
+        idx_uint8.active_index_type = 'ubyte'
+        return idx_uint8
+    else:
+        raise ValueError("Index dtype %s not supported" % dataset_dt)
diff --git a/python/pylibraft/pylibraft/test/test_ivf_flat.py b/python/pylibraft/pylibraft/test/test_ivf_flat.py
index 593980f7c8..23140073f1 100644
--- a/python/pylibraft/pylibraft/test/test_ivf_flat.py
+++ b/python/pylibraft/pylibraft/test/test_ivf_flat.py
@@ -461,3 +461,50 @@ def test_search_inputs(params):
             out_idx_device,
             out_dist_device,
         )
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte])
+def test_save_load(dtype):
+    n_rows = 10000
+    n_cols = 50
+    n_queries = 1000
+
+    dataset = generate_data((n_rows, n_cols), dtype)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = ivf_flat.IndexParams(n_lists=100, metric="sqeuclidean")
+    index = ivf_flat.build(build_params, dataset_device)
+
+    assert index.trained
+    filename = "my_index.bin"
+    ivf_flat.save(filename, index)
+    loaded_index = ivf_flat.load(filename)
+
+    assert index.metric == loaded_index.metric
+    assert index.n_lists == loaded_index.n_lists
+    assert index.dim == loaded_index.dim
+    assert index.adaptive_centers == loaded_index.adaptive_centers
+
+    queries = generate_data((n_queries, n_cols), dtype)
+
+    queries_device = device_ndarray(queries)
+    search_params = ivf_flat.SearchParams(n_probes=100)
+    k = 10
+
+    distance_dev, neighbors_dev = ivf_flat.search(
+        search_params, index, queries_device, k
+    )
+
+    neighbors = neighbors_dev.copy_to_host()
+    dist = distance_dev.copy_to_host()
+    del index
+
+    distance_dev, neighbors_dev = ivf_flat.search(
+        search_params, loaded_index, queries_device, k
+    )
+
+    neighbors2 = neighbors_dev.copy_to_host()
+    dist2 = distance_dev.copy_to_host()
+
+    assert np.all(neighbors == neighbors2)
+    assert np.allclose(dist, dist2, rtol=1e-6)

From 0154e8e73a4686b56d5f55784e17e1ae76f43161 Mon Sep 17 00:00:00 2001
From: Mahesh Doijade <36705640+mdoijade@users.noreply.github.com>
Date: Fri, 19 May 2023 22:29:17 +0530
Subject: [PATCH 67/78] Rename kernel arch finding function for dispatch
 (#1536)

-- as the kernel arch given by the cudaFuncAttribute ptxVersion depends on what archs the kernel was compiled for
we should renam kernel_runtime_arch() as kernel_virtual_arch().
-- accordingly update comments to reflect this.

Authors:
  - Mahesh Doijade (https://github.com/mdoijade)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1536
---
 cpp/include/raft/distance/detail/fused_l2_nn.cuh  |  7 ++++---
 .../detail/pairwise_matrix/dispatch-inl.cuh       |  7 ++++---
 cpp/include/raft/util/arch.cuh                    | 15 ++++++++-------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index 2ff8fa7f1c..68922943f4 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -325,12 +325,13 @@ void fusedL2NNImpl(OutT* min,
                                 decltype(distance_op),
                                 decltype(fin_op)>;
 
-  // Get pointer to fp32 SIMT kernel to determine the runtime architecture of the
-  // current system. Other methods to determine the architecture (that do not
+  // Get pointer to fp32 SIMT kernel to determine the best compute architecture
+  // out of all for which the kernel was compiled for that matches closely
+  // to the current device. Other methods to determine the architecture (that do not
   // require a pointer) can be error prone. See:
   // https://github.com/NVIDIA/cub/issues/545
   void* kernel_ptr   = reinterpret_cast<void*>(kernel);
-  auto runtime_arch  = arch::kernel_runtime_arch(kernel_ptr);
+  auto runtime_arch  = arch::kernel_virtual_arch(kernel_ptr);
   auto cutlass_range = arch::SM_range(arch::SM_80(), arch::SM_future());
 
   if (cutlass_range.contains(runtime_arch)) {
diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
index bb4422735b..b768008c7f 100644
--- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-inl.cuh
@@ -108,13 +108,14 @@ void pairwise_matrix_dispatch(OpT distance_op,
     auto cutlass_range = arch::SM_range(arch::SM_80(), arch::SM_future());
     auto legacy_range  = arch::SM_range(arch::SM_min(), arch::SM_80());
 
-    // Get pointer to SM60 kernel to determine the runtime architecture of the
-    // current system. Other methods to determine the architecture (that do not
+    // Get pointer to SM60 kernel to determine the best compute architecture
+    // out of all for which the kernel was compiled for that matches closely
+    // to the current device. Other methods to determine the architecture (that do not
     // require a pointer) can be error prone. See:
     // https://github.com/NVIDIA/cub/issues/545
     auto sm60_wrapper = pairwise_matrix_sm60_get_wrapper(distance_op, params, legacy_range);
     void* kernel_ptr  = reinterpret_cast<void*>(sm60_wrapper.kernel_ptr);
-    auto runtime_arch = arch::kernel_runtime_arch(kernel_ptr);
+    auto runtime_arch = arch::kernel_virtual_arch(kernel_ptr);
 
     if (cutlass_range.contains(runtime_arch)) {
       // If device is SM_80 or later, use CUTLASS-based kernel.
diff --git a/cpp/include/raft/util/arch.cuh b/cpp/include/raft/util/arch.cuh
index dc35b10063..1a67eded44 100644
--- a/cpp/include/raft/util/arch.cuh
+++ b/cpp/include/raft/util/arch.cuh
@@ -30,10 +30,10 @@ namespace raft::util::arch {
  *   compute architecture that a kernel is compiled with. It can only be used
  *   inside kernels with a template argument.
  *
- * - raft::util::arch::kernel_runtime_arch : a function that computes at *run-time*
- *   which version of a kernel will launch (i.e., it will return the compute
- *   architecture of the version of the kernel that will be launched by the
- *   driver).
+ * - raft::util::arch::kernel_virtual_arch : a function that computes at *run-time*
+ *   which version of a kernel will launch (i.e., it will return the virtual compute
+ *   architecture of the version of the kernel that it was compiled for which
+ *   will be launched by the driver).
  *
  * - raft::util::arch::SM_range : a compile-time value to represent an open interval
  *   of compute architectures. This can be used to check if the current
@@ -97,7 +97,7 @@ struct SM_compute_arch {
 // compute architecture of the version of the kernel that the driver picks when
 // the kernel runs.
 struct SM_runtime {
-  friend SM_runtime kernel_runtime_arch(void*);
+  friend SM_runtime kernel_virtual_arch(void*);
 
  private:
   const int _version;
@@ -107,7 +107,8 @@ struct SM_runtime {
   __host__ __device__ int value() const { return _version; }
 };
 
-// Computes which compute architecture of a kernel will run
+// Computes which virtual compute architecture the given kernel was compiled for,
+// driver picks the version of the kernel that closely matches the current hardware.
 //
 // Semantics are described above in the documentation of SM_runtime.
 //
@@ -115,7 +116,7 @@ struct SM_runtime {
 // to determine the architecture (that do not require a pointer) can be error
 // prone. See:
 // https://github.com/NVIDIA/cub/issues/545
-inline SM_runtime kernel_runtime_arch(void* kernel)
+inline SM_runtime kernel_virtual_arch(void* kernel)
 {
   cudaFuncAttributes attributes;
   RAFT_CUDA_TRY(cudaFuncGetAttributes(&attributes, kernel));

From db96e8b5f8759ef7cb9bfd115e12ae827b013fd9 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <ben@benfrederickson.com>
Date: Fri, 19 May 2023 14:24:54 -0700
Subject: [PATCH 68/78] Check python brute-force knn inputs (#1537)

The input validation code wasn't being triggered for the python bfknn api, causing invalid output when passed col-major inputs. Fix.

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1537
---
 .../pylibraft/common/device_ndarray.py        | 14 +++--------
 .../pylibraft/neighbors/brute_force.pyx       |  6 +++++
 .../pylibraft/test/test_brute_force.py        | 23 +++++++++++++++----
 .../pylibraft/pylibraft/test/test_handle.py   |  5 +---
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/python/pylibraft/pylibraft/common/device_ndarray.py b/python/pylibraft/pylibraft/common/device_ndarray.py
index eebbca2f06..f267e0c644 100644
--- a/python/pylibraft/pylibraft/common/device_ndarray.py
+++ b/python/pylibraft/pylibraft/common/device_ndarray.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -89,12 +89,8 @@ def c_contiguous(self):
         """
         Is the current device_ndarray laid out in row-major format?
         """
-        array_interface = self.ndarray_.__array_interface__
         strides = self.strides
-        return (
-            strides is None
-            or array_interface["strides"][1] == self.dtype.itemsize
-        )
+        return strides is None or strides[1] == self.dtype.itemsize
 
     @property
     def f_contiguous(self):
@@ -125,11 +121,7 @@ def strides(self):
         Strides of the current device_ndarray instance
         """
         array_interface = self.ndarray_.__array_interface__
-        return (
-            None
-            if "strides" not in array_interface
-            else array_interface["strides"]
-        )
+        return array_interface.get("strides")
 
     @property
     def __cuda_array_interface__(self):
diff --git a/python/pylibraft/pylibraft/neighbors/brute_force.pyx b/python/pylibraft/pylibraft/neighbors/brute_force.pyx
index 8836307a5a..2d118072ab 100644
--- a/python/pylibraft/pylibraft/neighbors/brute_force.pyx
+++ b/python/pylibraft/pylibraft/neighbors/brute_force.pyx
@@ -47,6 +47,7 @@ from pylibraft.distance.distance_type cimport DistanceType
 # TODO: Centralize this
 
 from pylibraft.distance.pairwise_distance import DISTANCE_TYPES
+from pylibraft.neighbors.common import _check_input_array
 
 from pylibraft.common.cpp.mdspan cimport (
     device_matrix_view,
@@ -143,6 +144,11 @@ def knn(dataset, queries, k=None, indices=None, distances=None,
             raise ValueError("Argument k must be specified if both indices "
                              "and distances arg is None")
 
+    # we require c-contiguous (rowmajor) inputs here
+    _check_input_array(dataset_cai, [np.dtype("float32")])
+    _check_input_array(queries_cai, [np.dtype("float32")],
+                       exp_cols=dataset_cai.shape[1])
+
     n_queries = queries_cai.shape[0]
 
     if indices is None:
diff --git a/python/pylibraft/pylibraft/test/test_brute_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py
index 0bd5e6eaaf..2e118d210d 100644
--- a/python/pylibraft/pylibraft/test/test_brute_force.py
+++ b/python/pylibraft/pylibraft/test/test_brute_force.py
@@ -40,11 +40,8 @@
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
-@pytest.mark.parametrize("order", ["F", "C"])
 @pytest.mark.parametrize("dtype", [np.float32])
-def test_knn(
-    n_index_rows, n_query_rows, n_cols, k, inplace, metric, order, dtype
-):
+def test_knn(n_index_rows, n_query_rows, n_cols, k, inplace, metric, dtype):
     index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype)
     queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype)
 
@@ -94,3 +91,21 @@ def test_knn(
         np.testing.assert_allclose(
             cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4
         )
+
+
+def test_knn_check_col_major_inputs():
+    # make sure that we get an exception if passed col-major inputs,
+    # instead of returning incorrect results
+    cp = pytest.importorskip("cupy")
+    n_index_rows, n_query_rows, n_cols = 128, 16, 32
+    index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32")
+    queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32")
+
+    with pytest.raises(ValueError):
+        knn(cp.asarray(index, order="F"), queries, k=4)
+
+    with pytest.raises(ValueError):
+        knn(index, cp.asarray(queries, order="F"), k=4)
+
+    # shouldn't throw an exception with c-contiguous inputs
+    knn(index, queries, k=4)
diff --git a/python/pylibraft/pylibraft/test/test_handle.py b/python/pylibraft/pylibraft/test/test_handle.py
index ae519ea965..bb07df1000 100644
--- a/python/pylibraft/pylibraft/test/test_handle.py
+++ b/python/pylibraft/pylibraft/test/test_handle.py
@@ -19,10 +19,7 @@
 from pylibraft.common import DeviceResources, Stream, device_ndarray
 from pylibraft.distance import pairwise_distance
 
-try:
-    import cupy
-except ImportError:
-    pytest.skip(reason="cupy not installed.")
+cupy = pytest.importorskip("cupy")
 
 
 @pytest.mark.parametrize("stream", [cupy.cuda.Stream().ptr, Stream()])

From 1f61b471ea6b39e11b0d426ed1a623f6a4fd9bb5 Mon Sep 17 00:00:00 2001
From: tsuki <12711693+enp1s0@users.noreply.github.com>
Date: Sat, 20 May 2023 06:26:36 +0900
Subject: [PATCH 69/78] Support uint64_t in CAGRA index data type (#1514)

This PR updates CAGRA::search to support `uint64_t` in the index data type. This update is required to implement the RAFT ANN benchmark for CAGRA.

Authors:
  - tsuki (https://github.com/enp1s0)
  - Ben Frederickson (https://github.com/benfred)
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)

URL: https://github.com/rapidsai/raft/pull/1514
---
 cpp/include/raft/neighbors/cagra.cuh          |  59 ++++-
 .../neighbors/detail/cagra/cagra_build.cuh    |  10 +-
 .../neighbors/detail/cagra/cagra_search.cuh   |  36 ++-
 .../detail/cagra/compute_distance.hpp         |   8 +-
 .../neighbors/detail/cagra/graph_core.cuh     |  20 +-
 .../raft/neighbors/detail/cagra/hashmap.hpp   |  27 +-
 .../detail/cagra/search_multi_cta.cuh         |  45 ++--
 .../detail/cagra/search_multi_kernel.cuh      |  96 +++----
 .../neighbors/detail/cagra/search_plan.cuh    |   6 +-
 .../detail/cagra/search_single_cta.cuh        | 185 +++++++------
 .../detail/cagra/topk_for_cagra/topk.h        |  19 +-
 .../detail/cagra/topk_for_cagra/topk_core.cuh | 250 +++++++++---------
 .../raft/neighbors/detail/cagra/utils.hpp     |  10 +
 cpp/test/CMakeLists.txt                       |   1 +
 .../neighbors/ann_cagra/test_float_int64_t.cu |  29 ++
 .../ann_cagra/test_float_uint32_t.cu          |  12 +-
 .../ann_cagra/test_int8_t_uint32_t.cu         |  12 +-
 .../ann_cagra/test_uint8_t_uint32_t.cu        |  12 +-
 18 files changed, 486 insertions(+), 351 deletions(-)
 create mode 100644 cpp/test/neighbors/ann_cagra/test_float_int64_t.cu

diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh
index 19f65baf1a..9905f2abae 100644
--- a/cpp/include/raft/neighbors/cagra.cuh
+++ b/cpp/include/raft/neighbors/cagra.cuh
@@ -81,7 +81,17 @@ void build_knn_graph(raft::resources const& res,
                      std::optional<ivf_pq::index_params> build_params   = std::nullopt,
                      std::optional<ivf_pq::search_params> search_params = std::nullopt)
 {
-  detail::build_knn_graph(res, dataset, knn_graph, refine_rate, build_params, search_params);
+  using internal_IdxT = typename std::make_unsigned<IdxT>::type;
+
+  auto knn_graph_internal = make_host_matrix_view<internal_IdxT, internal_IdxT>(
+    reinterpret_cast<internal_IdxT*>(knn_graph.data_handle()),
+    knn_graph.extent(0),
+    knn_graph.extent(1));
+  auto dataset_internal = mdspan<const DataT, matrix_extent<internal_IdxT>, row_major, accessor>(
+    dataset.data_handle(), dataset.extent(0), dataset.extent(1));
+
+  detail::build_knn_graph(
+    res, dataset_internal, knn_graph_internal, refine_rate, build_params, search_params);
 }
 
 /**
@@ -124,7 +134,20 @@ void sort_knn_graph(raft::resources const& res,
                     mdspan<const DataT, matrix_extent<IdxT>, row_major, d_accessor> dataset,
                     mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph)
 {
-  detail::graph::sort_knn_graph(res, dataset, knn_graph);
+  using internal_IdxT = typename std::make_unsigned<IdxT>::type;
+
+  using g_accessor_internal =
+    host_device_accessor<std::experimental::default_accessor<internal_IdxT>, g_accessor::mem_type>;
+  auto knn_graph_internal =
+    mdspan<internal_IdxT, matrix_extent<internal_IdxT>, row_major, g_accessor_internal>(
+      reinterpret_cast<internal_IdxT*>(knn_graph.data_handle()),
+      knn_graph.extent(0),
+      knn_graph.extent(1));
+
+  auto dataset_internal = mdspan<const DataT, matrix_extent<internal_IdxT>, row_major, d_accessor>(
+    dataset.data_handle(), dataset.extent(0), dataset.extent(1));
+
+  detail::graph::sort_knn_graph(res, dataset_internal, knn_graph_internal);
 }
 
 /**
@@ -148,7 +171,22 @@ void prune(raft::resources const& res,
            mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
            raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
 {
-  detail::graph::prune(res, knn_graph, new_graph);
+  using internal_IdxT = typename std::make_unsigned<IdxT>::type;
+
+  auto new_graph_internal = raft::make_host_matrix_view<internal_IdxT, internal_IdxT>(
+    reinterpret_cast<internal_IdxT*>(new_graph.data_handle()),
+    new_graph.extent(0),
+    new_graph.extent(1));
+
+  using g_accessor_internal =
+    host_device_accessor<std::experimental::default_accessor<internal_IdxT>, memory_type::host>;
+  auto knn_graph_internal =
+    mdspan<internal_IdxT, matrix_extent<internal_IdxT>, row_major, g_accessor_internal>(
+      reinterpret_cast<internal_IdxT*>(knn_graph.data_handle()),
+      knn_graph.extent(0),
+      knn_graph.extent(1));
+
+  detail::graph::prune(res, knn_graph_internal, new_graph_internal);
 }
 
 /**
@@ -200,7 +238,7 @@ index<T, IdxT> build(raft::resources const& res,
                      mdspan<const T, matrix_extent<IdxT>, row_major, Accessor> dataset)
 {
   size_t degree = params.intermediate_graph_degree;
-  if (degree >= dataset.extent(0)) {
+  if (degree >= static_cast<size_t>(dataset.extent(0))) {
     RAFT_LOG_WARN(
       "Intermediate graph degree cannot be larger than dataset size, reducing it to %lu",
       dataset.extent(0));
@@ -256,7 +294,18 @@ void search(raft::resources const& res,
   RAFT_EXPECTS(queries.extent(1) == idx.dim(),
                "Number of query dimensions should equal number of dimensions in the index.");
 
-  detail::search_main(res, params, idx, queries, neighbors, distances);
+  using internal_IdxT   = typename std::make_unsigned<IdxT>::type;
+  auto queries_internal = raft::make_device_matrix_view<const T, internal_IdxT, row_major>(
+    queries.data_handle(), queries.extent(0), queries.extent(1));
+  auto neighbors_internal = raft::make_device_matrix_view<internal_IdxT, internal_IdxT, row_major>(
+    reinterpret_cast<internal_IdxT*>(neighbors.data_handle()),
+    neighbors.extent(0),
+    neighbors.extent(1));
+  auto distances_internal = raft::make_device_matrix_view<float, internal_IdxT, row_major>(
+    distances.data_handle(), distances.extent(0), distances.extent(1));
+
+  detail::search_main<T, internal_IdxT, IdxT>(
+    res, params, idx, queries_internal, neighbors_internal, distances_internal);
 }
 /** @} */  // end group cagra
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
index f0eeb2b36c..d88aaa245a 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
@@ -38,8 +38,6 @@
 
 namespace raft::neighbors::experimental::cagra::detail {
 
-using INDEX_T = std::uint32_t;
-
 template <typename DataT, typename IdxT, typename accessor>
 void build_knn_graph(raft::resources const& res,
                      mdspan<const DataT, matrix_extent<IdxT>, row_major, accessor> dataset,
@@ -96,14 +94,14 @@ void build_knn_graph(raft::resources const& res,
   // search top (k + 1) neighbors
   //
   if (!search_params) {
-    search_params                          = ivf_pq::search_params{};
-    search_params->n_probes                = std::min(dataset.extent(1) * 2, build_params->n_lists);
-    search_params->lut_dtype               = CUDA_R_8U;
+    search_params            = ivf_pq::search_params{};
+    search_params->n_probes  = std::min<IdxT>(dataset.extent(1) * 2, build_params->n_lists);
+    search_params->lut_dtype = CUDA_R_8U;
     search_params->internal_distance_dtype = CUDA_R_32F;
   }
   const auto top_k          = node_degree + 1;
   uint32_t gpu_top_k        = node_degree * refine_rate.value_or(2.0f);
-  gpu_top_k                 = std::min(std::max(gpu_top_k, top_k), dataset.extent(0));
+  gpu_top_k                 = std::min<IdxT>(std::max(gpu_top_k, top_k), dataset.extent(0));
   const auto num_queries    = dataset.extent(0);
   const auto max_batch_size = 1024;
   RAFT_LOG_DEBUG(
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
index 0073f66d0b..d3b24dc861 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
@@ -52,13 +52,13 @@ namespace raft::neighbors::experimental::cagra::detail {
  * k]
  */
 
-template <typename T, typename IdxT = uint32_t, typename DistanceT = float>
+template <typename T, typename internal_IdxT, typename IdxT = uint32_t, typename DistanceT = float>
 void search_main(raft::resources const& res,
                  search_params params,
                  const index<T, IdxT>& index,
-                 raft::device_matrix_view<const T, IdxT, row_major> queries,
-                 raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-                 raft::device_matrix_view<DistanceT, IdxT, row_major> distances)
+                 raft::device_matrix_view<const T, internal_IdxT, row_major> queries,
+                 raft::device_matrix_view<internal_IdxT, internal_IdxT, row_major> neighbors,
+                 raft::device_matrix_view<DistanceT, internal_IdxT, row_major> distances)
 {
   RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n",
                  static_cast<size_t>(index.dataset().extent(0)),
@@ -69,8 +69,9 @@ void search_main(raft::resources const& res,
   RAFT_EXPECTS(queries.extent(1) == index.dim(), "Querise and index dim must match");
   uint32_t topk = neighbors.extent(1);
 
-  std::unique_ptr<search_plan_impl<T, IdxT, DistanceT>> plan =
-    factory<T, IdxT, DistanceT>::create(res, params, index.dim(), index.graph_degree(), topk);
+  std::unique_ptr<search_plan_impl<T, internal_IdxT, DistanceT>> plan =
+    factory<T, internal_IdxT, DistanceT>::create(
+      res, params, index.dim(), index.graph_degree(), topk);
 
   plan->check(neighbors.extent(1));
 
@@ -79,18 +80,29 @@ void search_main(raft::resources const& res,
   uint32_t query_dim   = queries.extent(1);
 
   for (unsigned qid = 0; qid < queries.extent(0); qid += max_queries) {
-    const uint32_t n_queries       = std::min<std::size_t>(max_queries, queries.extent(0) - qid);
-    IdxT* _topk_indices_ptr        = neighbors.data_handle() + (topk * qid);
+    const uint32_t n_queries = std::min<std::size_t>(max_queries, queries.extent(0) - qid);
+    internal_IdxT* _topk_indices_ptr =
+      reinterpret_cast<internal_IdxT*>(neighbors.data_handle()) + (topk * qid);
     DistanceT* _topk_distances_ptr = distances.data_handle() + (topk * qid);
     // todo(tfeher): one could keep distances optional and pass nullptr
     const T* _query_ptr = queries.data_handle() + (query_dim * qid);
-    const IdxT* _seed_ptr =
-      plan->num_seeds > 0 ? plan->dev_seed.data() + (plan->num_seeds * qid) : nullptr;
+    const internal_IdxT* _seed_ptr =
+      plan->num_seeds > 0
+        ? reinterpret_cast<const internal_IdxT*>(plan->dev_seed.data()) + (plan->num_seeds * qid)
+        : nullptr;
     uint32_t* _num_executed_iterations = nullptr;
 
+    auto dataset_internal = raft::make_device_matrix_view<const T, internal_IdxT, row_major>(
+      index.dataset().data_handle(), index.dataset().extent(0), index.dataset().extent(1));
+    auto graph_internal =
+      raft::make_device_matrix_view<const internal_IdxT, internal_IdxT, row_major>(
+        reinterpret_cast<const internal_IdxT*>(index.graph().data_handle()),
+        index.graph().extent(0),
+        index.graph().extent(1));
+
     (*plan)(res,
-            index.dataset(),
-            index.graph(),
+            dataset_internal,
+            graph_internal,
             _topk_indices_ptr,
             _topk_distances_ptr,
             _query_ptr,
diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
index 52e5c62169..fd66735cf6 100644
--- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp
@@ -59,9 +59,9 @@ _RAFT_DEVICE void compute_distance_to_random_nodes(
   const std::size_t num_pickup,
   const unsigned num_distilation,
   const uint64_t rand_xor_mask,
-  const INDEX_T* seed_ptr,  // [num_seeds]
+  const INDEX_T* const seed_ptr,  // [num_seeds]
   const uint32_t num_seeds,
-  uint32_t* const visited_hash_ptr,
+  INDEX_T* const visited_hash_ptr,
   const uint32_t hash_bitlen,
   const uint32_t block_id   = 0,
   const uint32_t num_blocks = 1)
@@ -79,7 +79,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes(
     DISTANCE_T best_norm2_team_local = utils::get_max_value<DISTANCE_T>();
     for (uint32_t j = 0; j < num_distilation; j++) {
       // Select a node randomly and compute the distance to it
-      uint32_t seed_index;
+      INDEX_T seed_index;
       DISTANCE_T norm2 = 0.0;
       if (valid_i) {
         // uint32_t gid = i + (num_pickup * (j + (num_distilation * block_id)));
@@ -150,7 +150,7 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in
                                                   const INDEX_T* const knn_graph,
                                                   const std::uint32_t knn_k,
                                                   // hashmap
-                                                  std::uint32_t* const visited_hashmap_ptr,
+                                                  INDEX_T* const visited_hashmap_ptr,
                                                   const std::uint32_t hash_bitlen,
                                                   const INDEX_T* const parent_indices,
                                                   const std::uint32_t num_parents)
diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
index aa3f7dd29f..feb9b76b2d 100644
--- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
@@ -33,6 +33,8 @@
 
 #include <raft/util/cuda_rt_essentials.hpp>
 
+#include "utils.hpp"
+
 namespace raft::neighbors::experimental::cagra::detail {
 namespace graph {
 
@@ -115,7 +117,7 @@ __global__ void kern_sort(const DATA_T* const dataset,  // [dataset_chunk_size,
       my_vals[i] = smem_vals[k];
     } else {
       my_keys[i] = FLT_MAX;
-      my_vals[i] = ~static_cast<IdxT>(0);
+      my_vals[i] = utils::get_max_value<IdxT>();
     }
   }
   __syncthreads();
@@ -607,7 +609,7 @@ void prune(raft::resources const& res,
 
     memcpy(output_graph_ptr,
            pruned_graph.data_handle(),
-           sizeof(uint32_t) * graph_size * output_graph_degree);
+           sizeof(IdxT) * graph_size * output_graph_degree);
 
     constexpr int _omp_chunk = 1024;
 #pragma omp parallel for schedule(dynamic, _omp_chunk)
@@ -616,15 +618,15 @@ void prune(raft::resources const& res,
         uint64_t k = rev_graph_count.data_handle()[j] - 1 - _k;
         uint64_t i = rev_graph.data_handle()[k + (output_graph_degree * j)];
 
-        uint64_t pos = pos_in_array<uint32_t>(
-          i, output_graph_ptr + (output_graph_degree * j), output_graph_degree);
+        uint64_t pos =
+          pos_in_array<IdxT>(i, output_graph_ptr + (output_graph_degree * j), output_graph_degree);
         if (pos < num_protected_edges) { continue; }
         uint64_t num_shift = pos - num_protected_edges;
         if (pos == output_graph_degree) {
           num_shift = output_graph_degree - num_protected_edges - 1;
         }
-        shift_array<uint32_t>(output_graph_ptr + num_protected_edges + (output_graph_degree * j),
-                              num_shift);
+        shift_array<IdxT>(output_graph_ptr + num_protected_edges + (output_graph_degree * j),
+                          num_shift);
         output_graph_ptr[num_protected_edges + (output_graph_degree * j)] = i;
       }
       if ((omp_get_thread_num() == 0) && ((j % _omp_chunk) == 0)) {
@@ -641,9 +643,9 @@ void prune(raft::resources const& res,
 #pragma omp parallel for reduction(+ : num_replaced_edges)
     for (uint64_t i = 0; i < graph_size; i++) {
       for (uint64_t k = 0; k < output_graph_degree; k++) {
-        const uint64_t j   = pruned_graph.data_handle()[k + (output_graph_degree * i)];
-        const uint64_t pos = pos_in_array<uint32_t>(
-          j, output_graph_ptr + (output_graph_degree * i), output_graph_degree);
+        const uint64_t j = pruned_graph.data_handle()[k + (output_graph_degree * i)];
+        const uint64_t pos =
+          pos_in_array<IdxT>(j, output_graph_ptr + (output_graph_degree * i), output_graph_degree);
         if (pos == output_graph_degree) { num_replaced_edges += 1; }
       }
     }
diff --git a/cpp/include/raft/neighbors/detail/cagra/hashmap.hpp b/cpp/include/raft/neighbors/detail/cagra/hashmap.hpp
index 18f4006367..cd2c8ec491 100644
--- a/cpp/include/raft/neighbors/detail/cagra/hashmap.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/hashmap.hpp
@@ -27,32 +27,33 @@ namespace hashmap {
 
 _RAFT_HOST_DEVICE inline uint32_t get_size(const uint32_t bitlen) { return 1U << bitlen; }
 
-template <unsigned FIRST_TID = 0>
-_RAFT_DEVICE inline void init(uint32_t* table, const uint32_t bitlen)
+template <unsigned FIRST_TID = 0, class IdxT = void>
+_RAFT_DEVICE inline void init(IdxT* const table, const unsigned bitlen)
 {
   if (threadIdx.x < FIRST_TID) return;
   for (unsigned i = threadIdx.x - FIRST_TID; i < get_size(bitlen); i += blockDim.x - FIRST_TID) {
-    table[i] = utils::get_max_value<uint32_t>();
+    table[i] = utils::get_max_value<IdxT>();
   }
 }
 
-template <unsigned FIRST_TID, unsigned LAST_TID>
-_RAFT_DEVICE inline void init(uint32_t* table, const uint32_t bitlen)
+template <unsigned FIRST_TID, unsigned LAST_TID, class IdxT>
+_RAFT_DEVICE inline void init(IdxT* const table, const uint32_t bitlen)
 {
   if ((FIRST_TID > 0 && threadIdx.x < FIRST_TID) || threadIdx.x >= LAST_TID) return;
   for (unsigned i = threadIdx.x - FIRST_TID; i < get_size(bitlen); i += LAST_TID - FIRST_TID) {
-    table[i] = utils::get_max_value<uint32_t>();
+    table[i] = utils::get_max_value<IdxT>();
   }
 }
 
-_RAFT_DEVICE inline uint32_t insert(uint32_t* table, const uint32_t bitlen, const uint32_t key)
+template <class IdxT>
+_RAFT_DEVICE inline uint32_t insert(IdxT* const table, const uint32_t bitlen, const IdxT key)
 {
   // Open addressing is used for collision resolution
   const uint32_t size     = get_size(bitlen);
   const uint32_t bit_mask = size - 1;
 #if 1
   // Linear probing
-  uint32_t index            = (key ^ (key >> bitlen)) & bit_mask;
+  IdxT index                = (key ^ (key >> bitlen)) & bit_mask;
   constexpr uint32_t stride = 1;
 #else
   // Double hashing
@@ -60,8 +61,8 @@ _RAFT_DEVICE inline uint32_t insert(uint32_t* table, const uint32_t bitlen, cons
   const uint32_t stride = (key >> bitlen) * 2 + 1;
 #endif
   for (unsigned i = 0; i < size; i++) {
-    const uint32_t old = atomicCAS(&table[index], ~0u, key);
-    if (old == ~0u) {
+    const IdxT old = atomicCAS(&table[index], ~static_cast<IdxT>(0), key);
+    if (old == ~static_cast<IdxT>(0)) {
       return 1;
     } else if (old == key) {
       return 0;
@@ -71,10 +72,10 @@ _RAFT_DEVICE inline uint32_t insert(uint32_t* table, const uint32_t bitlen, cons
   return 0;
 }
 
-template <unsigned TEAM_SIZE>
-_RAFT_DEVICE inline uint32_t insert(uint32_t* table, const uint32_t bitlen, const uint32_t key)
+template <unsigned TEAM_SIZE, class IdxT>
+_RAFT_DEVICE inline uint32_t insert(IdxT* const table, const uint32_t bitlen, const IdxT key)
 {
-  uint32_t ret = 0;
+  IdxT ret = 0;
   if (threadIdx.x % TEAM_SIZE == 0) { ret = insert(table, bitlen, key); }
   for (unsigned offset = 1; offset < TEAM_SIZE; offset *= 2) {
     ret |= __shfl_xor_sync(0xffffffff, ret, offset);
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
index 4cccc36a23..f9a0fef2fe 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh
@@ -52,7 +52,8 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num
                                     const size_t num_itopk,
                                     uint32_t* const terminate_flag)
 {
-  const unsigned warp_id = threadIdx.x / 32;
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+  const unsigned warp_id             = threadIdx.x / 32;
   if (warp_id > 0) { return; }
   const unsigned lane_id = threadIdx.x % 32;
   for (uint32_t i = lane_id; i < num_parents; i += 32) {
@@ -66,7 +67,7 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num
     int new_parent = 0;
     if (j < num_itopk) {
       index = itopk_indices[j];
-      if ((index & 0x80000000) == 0) {  // check if most significant bit is set
+      if ((index & index_msb_1_mask) == 0) {  // check if most significant bit is set
         new_parent = 1;
       }
     }
@@ -75,7 +76,7 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num
       const auto i = __popc(ballot_mask & ((1 << lane_id) - 1)) + num_new_parents;
       if (i < num_parents) {
         next_parent_indices[i] = index;
-        itopk_indices[j] |= 0x80000000;  // set most significant bit as used node
+        itopk_indices[j] |= index_msb_1_mask;  // set most significant bit as used node
       }
     }
     num_new_parents += __popc(ballot_mask);
@@ -84,9 +85,9 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices,  // [num
   if (threadIdx.x == 0 && (num_new_parents == 0)) { *terminate_flag = 1; }
 }
 
-template <unsigned MAX_ELEMENTS>
+template <unsigned MAX_ELEMENTS, class INDEX_T>
 __device__ inline void topk_by_bitonic_sort(float* distances,         // [num_elements]
-                                            uint32_t* indices,        // [num_elements]
+                                            INDEX_T* indices,         // [num_elements]
                                             const uint32_t num_elements,
                                             const uint32_t num_itopk  // num_itopk <= num_elements
 )
@@ -96,7 +97,7 @@ __device__ inline void topk_by_bitonic_sort(float* distances,         // [num_el
   const unsigned lane_id = threadIdx.x % 32;
   constexpr unsigned N   = (MAX_ELEMENTS + 31) / 32;
   float key[N];
-  uint32_t val[N];
+  INDEX_T val[N];
   for (unsigned i = 0; i < N; i++) {
     unsigned j = lane_id + (32 * i);
     if (j < num_elements) {
@@ -104,11 +105,11 @@ __device__ inline void topk_by_bitonic_sort(float* distances,         // [num_el
       val[i] = indices[j];
     } else {
       key[i] = utils::get_max_value<float>();
-      val[i] = utils::get_max_value<uint32_t>();
+      val[i] = utils::get_max_value<INDEX_T>();
     }
   }
   /* Warp Sort */
-  bitonic::warp_sort<float, uint32_t, N>(key, val);
+  bitonic::warp_sort<float, INDEX_T, N>(key, val);
   /* Store itopk sorted results */
   for (unsigned i = 0; i < N; i++) {
     unsigned j = (N * lane_id) + i;
@@ -142,9 +143,9 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   const uint32_t graph_degree,
   const unsigned num_distilation,
   const uint64_t rand_xor_mask,
-  const INDEX_T* seed_ptr,              // [num_queries, num_seeds]
+  const INDEX_T* seed_ptr,             // [num_queries, num_seeds]
   const uint32_t num_seeds,
-  uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
+  INDEX_T* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
   const uint32_t hash_bitlen,
   const uint32_t itopk_size,
   const uint32_t num_parents,
@@ -194,7 +195,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   auto result_distances_buffer =
     reinterpret_cast<DISTANCE_T*>(result_indices_buffer + result_buffer_size_32);
   auto parent_indices_buffer =
-    reinterpret_cast<uint32_t*>(result_distances_buffer + result_buffer_size_32);
+    reinterpret_cast<INDEX_T*>(result_distances_buffer + result_buffer_size_32);
   auto terminate_flag = reinterpret_cast<uint32_t*>(parent_indices_buffer + num_parents);
 
 #if 0
@@ -215,7 +216,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
     }
   }
   if (threadIdx.x == 0) { terminate_flag[0] = 0; }
-  uint32_t* local_visited_hashmap_ptr =
+  INDEX_T* const local_visited_hashmap_ptr =
     visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * query_id);
   __syncthreads();
   _CLK_REC(clk_init);
@@ -246,10 +247,10 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   while (1) {
     // topk with bitonic sort
     _CLK_START();
-    topk_by_bitonic_sort<MAX_ELEMENTS>(result_distances_buffer,
-                                       result_indices_buffer,
-                                       itopk_size + (num_parents * graph_degree),
-                                       itopk_size);
+    topk_by_bitonic_sort<MAX_ELEMENTS, INDEX_T>(result_distances_buffer,
+                                                result_indices_buffer,
+                                                itopk_size + (num_parents * graph_degree),
+                                                itopk_size);
     _CLK_REC(clk_topk);
 
     if (iter + 1 == max_iteration) {
@@ -292,7 +293,11 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
   for (uint32_t i = threadIdx.x; i < itopk_size; i += BLOCK_SIZE) {
     uint32_t j = i + (itopk_size * (cta_id + (num_cta_per_query * query_id)));
     if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[i]; }
-    result_indices_ptr[j] = result_indices_buffer[i] & ~0x80000000;  // clear most significant bit
+
+    constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
+    result_indices_ptr[j] =
+      result_indices_buffer[i] & ~index_msb_1_mask;  // clear most significant bit
   }
 
   if (threadIdx.x == 0 && cta_id == 0 && num_executed_iterations != nullptr) {
@@ -368,7 +373,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(
                                   const uint64_t rand_xor_mask,             \
                                   const INDEX_T* seed_ptr,                  \
                                   const uint32_t num_seeds,                 \
-                                  uint32_t* const visited_hashmap_ptr,      \
+                                  INDEX_T* const visited_hashmap_ptr,       \
                                   const uint32_t hash_bitlen,               \
                                   const uint32_t itopk_size,                \
                                   const uint32_t num_parents,               \
@@ -456,7 +461,7 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   using search_plan_impl<DATA_T, INDEX_T, DISTANCE_T>::num_seeds;
 
   uint32_t num_cta_per_query;
-  rmm::device_uvector<uint32_t> intermediate_indices;
+  rmm::device_uvector<INDEX_T> intermediate_indices;
   rmm::device_uvector<float> intermediate_distances;
   size_t topk_workspace_size;
   rmm::device_uvector<uint32_t> topk_workspace;
@@ -583,7 +588,7 @@ struct search : public search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
     // Initialize hash table
     const uint32_t hash_size = hashmap::get_size(hash_bitlen);
     set_value_batch(
-      hashmap.data(), hash_size, utils::get_max_value<uint32_t>(), hash_size, num_queries, stream);
+      hashmap.data(), hash_size, utils::get_max_value<INDEX_T>(), hash_size, num_queries, stream);
 
     dim3 block_dims(block_size, 1, 1);
     dim3 grid_dims(num_cta_per_query, num_queries, 1);
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
index 439ebd563b..8fbd5d8f03 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh
@@ -97,12 +97,12 @@ __global__ void random_pickup_kernel(
   const std::size_t num_pickup,
   const unsigned num_distilation,
   const uint64_t rand_xor_mask,
-  const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
+  const INDEX_T* seed_ptr,                 // [num_queries, num_seeds]
   const uint32_t num_seeds,
-  INDEX_T* const result_indices_ptr,         // [num_queries, ldr]
-  DISTANCE_T* const result_distances_ptr,    // [num_queries, ldr]
-  const std::uint32_t ldr,                   // (*) ldr >= num_pickup
-  std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << bitlen]
+  INDEX_T* const result_indices_ptr,       // [num_queries, ldr]
+  DISTANCE_T* const result_distances_ptr,  // [num_queries, ldr]
+  const std::uint32_t ldr,                 // (*) ldr >= num_pickup
+  INDEX_T* const visited_hashmap_ptr,      // [num_queries, 1 << bitlen]
   const std::uint32_t hash_bitlen)
 {
   const auto ldb               = hashmap::get_size(hash_bitlen);
@@ -168,12 +168,12 @@ void random_pickup(const DATA_T* const dataset_ptr,  // [dataset_size, dataset_d
                    const std::size_t num_pickup,
                    const unsigned num_distilation,
                    const uint64_t rand_xor_mask,
-                   const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
+                   const INDEX_T* seed_ptr,                 // [num_queries, num_seeds]
                    const uint32_t num_seeds,
-                   INDEX_T* const result_indices_ptr,         // [num_queries, ldr]
-                   DISTANCE_T* const result_distances_ptr,    // [num_queries, ldr]
-                   const std::size_t ldr,                     // (*) ldr >= num_pickup
-                   std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << bitlen]
+                   INDEX_T* const result_indices_ptr,       // [num_queries, ldr]
+                   DISTANCE_T* const result_distances_ptr,  // [num_queries, ldr]
+                   const std::size_t ldr,                   // (*) ldr >= num_pickup
+                   INDEX_T* const visited_hashmap_ptr,      // [num_queries, 1 << bitlen]
                    const std::uint32_t hash_bitlen,
                    cudaStream_t const cuda_stream = 0)
 {
@@ -204,7 +204,7 @@ __global__ void pickup_next_parents_kernel(
   INDEX_T* const parent_candidates_ptr,        // [num_queries, lds]
   const std::size_t lds,                       // (*) lds >= parent_candidates_size
   const std::uint32_t parent_candidates_size,  //
-  std::uint32_t* const visited_hashmap_ptr,    // [num_queries, 1 << hash_bitlen]
+  INDEX_T* const visited_hashmap_ptr,          // [num_queries, 1 << hash_bitlen]
   const std::size_t hash_bitlen,
   const std::uint32_t small_hash_bitlen,
   INDEX_T* const parent_list_ptr,      // [num_queries, ldd]
@@ -212,6 +212,8 @@ __global__ void pickup_next_parents_kernel(
   const std::size_t parent_list_size,  //
   std::uint32_t* const terminate_flag)
 {
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
   const std::size_t ldb   = hashmap::get_size(hash_bitlen);
   const uint32_t query_id = blockIdx.x;
   if (threadIdx.x < 32) {
@@ -229,7 +231,7 @@ __global__ void pickup_next_parents_kernel(
       int new_parent = 0;
       if (j < parent_candidates_size) {
         index = parent_candidates_ptr[j + (lds * query_id)];
-        if ((index & 0x80000000) == 0) {  // check most significant bit
+        if ((index & index_msb_1_mask) == 0) {  // check most significant bit
           new_parent = 1;
         }
       }
@@ -239,7 +241,7 @@ __global__ void pickup_next_parents_kernel(
         if (i < parent_list_size) {
           parent_list_ptr[i + (ldd * query_id)] = index;
           parent_candidates_ptr[j + (lds * query_id)] |=
-            0x80000000;  // set most significant bit as used node
+            index_msb_1_mask;  // set most significant bit as used node
         }
       }
       num_new_parents += __popc(ballot_mask);
@@ -255,27 +257,26 @@ __global__ void pickup_next_parents_kernel(
     __syncthreads();
     // insert internal-topk indices into small-hash
     for (unsigned i = threadIdx.x; i < parent_candidates_size; i += blockDim.x) {
-      auto key =
-        parent_candidates_ptr[i + (lds * query_id)] & ~0x80000000;  // clear most significant bit
+      auto key = parent_candidates_ptr[i + (lds * query_id)] &
+                 ~index_msb_1_mask;  // clear most significant bit
       hashmap::insert(visited_hashmap_ptr + (ldb * query_id), hash_bitlen, key);
     }
   }
 }
 
 template <class INDEX_T>
-void pickup_next_parents(
-  INDEX_T* const parent_candidates_ptr,      // [num_queries, lds]
-  const std::size_t lds,                     // (*) lds >= parent_candidates_size
-  const std::size_t parent_candidates_size,  //
-  const std::size_t num_queries,
-  std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
-  const std::size_t hash_bitlen,
-  const std::size_t small_hash_bitlen,
-  INDEX_T* const parent_list_ptr,      // [num_queries, ldd]
-  const std::size_t ldd,               // (*) ldd >= parent_list_size
-  const std::size_t parent_list_size,  //
-  std::uint32_t* const terminate_flag,
-  cudaStream_t cuda_stream = 0)
+void pickup_next_parents(INDEX_T* const parent_candidates_ptr,  // [num_queries, lds]
+                         const std::size_t lds,                 // (*) lds >= parent_candidates_size
+                         const std::size_t parent_candidates_size,  //
+                         const std::size_t num_queries,
+                         INDEX_T* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
+                         const std::size_t hash_bitlen,
+                         const std::size_t small_hash_bitlen,
+                         INDEX_T* const parent_list_ptr,      // [num_queries, ldd]
+                         const std::size_t ldd,               // (*) ldd >= parent_list_size
+                         const std::size_t parent_list_size,  //
+                         std::uint32_t* const terminate_flag,
+                         cudaStream_t cuda_stream = 0)
 {
   std::uint32_t block_size = 32;
   if (small_hash_bitlen) {
@@ -309,14 +310,14 @@ __global__ void compute_distance_to_child_nodes_kernel(
   const DATA_T* const dataset_ptr,        // [dataset_size, data_dim]
   const std::uint32_t data_dim,
   const std::uint32_t dataset_size,
-  const INDEX_T* const neighbor_graph_ptr,   // [dataset_size, graph_degree]
+  const INDEX_T* const neighbor_graph_ptr,  // [dataset_size, graph_degree]
   const std::uint32_t graph_degree,
-  const DATA_T* query_ptr,                   // [num_queries, data_dim]
-  std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
+  const DATA_T* query_ptr,                  // [num_queries, data_dim]
+  INDEX_T* const visited_hashmap_ptr,       // [num_queries, 1 << hash_bitlen]
   const std::uint32_t hash_bitlen,
-  INDEX_T* const result_indices_ptr,         // [num_queries, ldd]
-  DISTANCE_T* const result_distances_ptr,    // [num_queries, ldd]
-  const std::uint32_t ldd                    // (*) ldd >= num_parents * graph_degree
+  INDEX_T* const result_indices_ptr,        // [num_queries, ldd]
+  DISTANCE_T* const result_distances_ptr,   // [num_queries, ldd]
+  const std::uint32_t ldd                   // (*) ldd >= num_parents * graph_degree
 )
 {
   const uint32_t ldb        = hashmap::get_size(hash_bitlen);
@@ -334,7 +335,8 @@ __global__ void compute_distance_to_child_nodes_kernel(
 
   const std::size_t child_id = neighbor_list_head_ptr[global_team_id % graph_degree];
 
-  if (hashmap::insert<TEAM_SIZE>(visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id)) {
+  if (hashmap::insert<TEAM_SIZE, INDEX_T>(
+        visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id)) {
     device::fragment<MAX_DATASET_DIM, DATA_T, TEAM_SIZE> frag_target;
     device::load_vector_sync(frag_target, dataset_ptr + (data_dim * child_id), data_dim);
 
@@ -368,15 +370,15 @@ void compute_distance_to_child_nodes(
   const DATA_T* const dataset_ptr,        // [dataset_size, data_dim]
   const std::uint32_t data_dim,
   const std::uint32_t dataset_size,
-  const INDEX_T* const neighbor_graph_ptr,   // [dataset_size, graph_degree]
+  const INDEX_T* const neighbor_graph_ptr,  // [dataset_size, graph_degree]
   const std::uint32_t graph_degree,
-  const DATA_T* query_ptr,                   // [num_queries, data_dim]
+  const DATA_T* query_ptr,                  // [num_queries, data_dim]
   const std::uint32_t num_queries,
-  std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
+  INDEX_T* const visited_hashmap_ptr,       // [num_queries, 1 << hash_bitlen]
   const std::uint32_t hash_bitlen,
-  INDEX_T* const result_indices_ptr,         // [num_queries, ldd]
-  DISTANCE_T* const result_distances_ptr,    // [num_queries, ldd]
-  const std::uint32_t ldd,                   // (*) ldd >= num_parents * graph_degree
+  INDEX_T* const result_indices_ptr,        // [num_queries, ldd]
+  DISTANCE_T* const result_distances_ptr,   // [num_queries, ldd]
+  const std::uint32_t ldd,                  // (*) ldd >= num_parents * graph_degree
   cudaStream_t cuda_stream = 0)
 {
   const auto block_size = 128;
@@ -405,11 +407,13 @@ __global__ void remove_parent_bit_kernel(const std::uint32_t num_queries,
                                          INDEX_T* const topk_indices_ptr,  // [ld, num_queries]
                                          const std::uint32_t ld)
 {
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
   uint32_t i_query = blockIdx.x;
   if (i_query >= num_queries) return;
 
   for (unsigned i = threadIdx.x; i < num_topk; i += blockDim.x) {
-    topk_indices_ptr[i + (ld * i_query)] &= ~0x80000000;  // clear most significant bit
+    topk_indices_ptr[i + (ld * i_query)] &= ~index_msb_1_mask;  // clear most significant bit
   }
 }
 
@@ -537,9 +541,9 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
   using search_plan_impl<DATA_T, INDEX_T, DISTANCE_T>::num_seeds;
 
   size_t result_buffer_allocation_size;
-  rmm::device_uvector<uint32_t> result_indices;  // results_indices_buffer
-  rmm::device_uvector<float> result_distances;   // result_distances_buffer
-  rmm::device_uvector<uint32_t> parent_node_list;
+  rmm::device_uvector<INDEX_T> result_indices;  // results_indices_buffer
+  rmm::device_uvector<float> result_distances;  // result_distances_buffer
+  rmm::device_uvector<INDEX_T> parent_node_list;
   rmm::device_uvector<uint32_t> topk_hint;
   rmm::device_scalar<uint32_t> terminate_flag;  // dev_terminate_flag, host_terminate_flag.;
   rmm::device_uvector<uint32_t> topk_workspace;
@@ -600,7 +604,7 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
     cudaStream_t stream      = resource::get_cuda_stream(res);
     const uint32_t hash_size = hashmap::get_size(hash_bitlen);
     set_value_batch(
-      hashmap.data(), hash_size, utils::get_max_value<uint32_t>(), hash_size, num_queries, stream);
+      hashmap.data(), hash_size, utils::get_max_value<INDEX_T>(), hash_size, num_queries, stream);
     // Init topk_hint
     if (topk_hint.size() > 0) { set_value(topk_hint.data(), 0xffffffffu, num_queries, stream); }
 
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
index b573d7d7ca..3bed100a70 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
@@ -81,9 +81,9 @@ struct search_plan_impl : public search_plan_impl_base {
   uint32_t topk;
   uint32_t num_seeds;
 
-  rmm::device_uvector<uint32_t> hashmap;
+  rmm::device_uvector<INDEX_T> hashmap;
   rmm::device_uvector<uint32_t> num_executed_iterations;  // device or managed?
-  rmm::device_uvector<uint32_t> dev_seed;                 // IdxT
+  rmm::device_uvector<INDEX_T> dev_seed;
 
   search_plan_impl(raft::resources const& res,
                    search_params params,
@@ -243,7 +243,7 @@ struct search_plan_impl : public search_plan_impl_base {
     if (small_hash_bitlen > 0) {
       RAFT_LOG_DEBUG("# small_hash_reset_interval = %lu", small_hash_reset_interval);
     }
-    hashmap_size = sizeof(std::uint32_t) * max_queries * hashmap::get_size(hash_bitlen);
+    hashmap_size = sizeof(INDEX_T) * max_queries * hashmap::get_size(hash_bitlen);
     RAFT_LOG_DEBUG("# hashmap size: %lu", hashmap_size);
     if (hashmap_size >= 1024 * 1024 * 1024) {
       RAFT_LOG_DEBUG(" (%.2f GiB)", (double)hashmap_size / (1024 * 1024 * 1024));
diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
index d64afb0d11..9400a16c36 100644
--- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh
@@ -53,6 +53,7 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag,
                                     const std::size_t dataset_size,
                                     const std::uint32_t num_parents)
 {
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
   // if (threadIdx.x >= 32) return;
 
   for (std::uint32_t i = threadIdx.x; i < num_parents; i += 32) {
@@ -68,7 +69,7 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag,
     int new_parent = 0;
     if (j < internal_topk_size) {
       index = internal_topk_indices[jj];
-      if ((index & 0x80000000) == 0) {  // check if most significant bit is set
+      if ((index & index_msb_1_mask) == 0) {  // check if most significant bit is set
         new_parent = 1;
       }
     }
@@ -78,7 +79,7 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag,
       if (i < num_parents) {
         next_parent_indices[i] = index;
         // set most significant bit as used node
-        internal_topk_indices[jj] |= 0x80000000;
+        internal_topk_indices[jj] |= index_msb_1_mask;
       }
     }
     num_new_parents += __popc(ballot_mask);
@@ -93,49 +94,52 @@ struct topk_by_radix_sort_base {
   static constexpr std::uint32_t state_bit_lenght = 0;
   static constexpr std::uint32_t vecLen           = 2;  // TODO
 };
-template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE, class = void>
+template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE, class IdxT, class = void>
 struct topk_by_radix_sort : topk_by_radix_sort_base<MAX_INTERNAL_TOPK> {};
 
-template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE>
+template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE, class IdxT>
 struct topk_by_radix_sort<MAX_INTERNAL_TOPK,
                           BLOCK_SIZE,
+                          IdxT,
                           std::enable_if_t<((MAX_INTERNAL_TOPK <= 64))>>
   : topk_by_radix_sort_base<MAX_INTERNAL_TOPK> {
   __device__ void operator()(uint32_t topk,
                              uint32_t batch_size,
                              uint32_t len_x,
                              const uint32_t* _x,
-                             const uint32_t* _in_vals,
+                             const IdxT* _in_vals,
                              uint32_t* _y,
-                             uint32_t* _out_vals,
+                             IdxT* _out_vals,
                              uint32_t* work,
                              uint32_t* _hints,
                              bool sort,
                              uint32_t* _smem)
   {
-    std::uint8_t* state = (std::uint8_t*)work;
+    std::uint8_t* const state = reinterpret_cast<std::uint8_t*>(work);
     topk_cta_11_core<BLOCK_SIZE,
                      topk_by_radix_sort_base<MAX_INTERNAL_TOPK>::state_bit_lenght,
                      topk_by_radix_sort_base<MAX_INTERNAL_TOPK>::vecLen,
                      64,
-                     32>(topk, len_x, _x, _in_vals, _y, _out_vals, state, _hints, sort, _smem);
+                     32,
+                     IdxT>(topk, len_x, _x, _in_vals, _y, _out_vals, state, _hints, sort, _smem);
   }
 };
 
 #define TOP_FUNC_PARTIAL_SPECIALIZATION(V)                                           \
-  template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE>                         \
+  template <unsigned MAX_INTERNAL_TOPK, unsigned BLOCK_SIZE, class IdxT>             \
   struct topk_by_radix_sort<                                                         \
     MAX_INTERNAL_TOPK,                                                               \
     BLOCK_SIZE,                                                                      \
+    IdxT,                                                                            \
     std::enable_if_t<((MAX_INTERNAL_TOPK <= V) && (2 * MAX_INTERNAL_TOPK > V))>>     \
     : topk_by_radix_sort_base<MAX_INTERNAL_TOPK> {                                   \
     __device__ void operator()(uint32_t topk,                                        \
                                uint32_t batch_size,                                  \
                                uint32_t len_x,                                       \
                                const uint32_t* _x,                                   \
-                               const uint32_t* _in_vals,                             \
+                               const IdxT* _in_vals,                                 \
                                uint32_t* _y,                                         \
-                               uint32_t* _out_vals,                                  \
+                               IdxT* _out_vals,                                      \
                                uint32_t* work,                                       \
                                uint32_t* _hints,                                     \
                                bool sort,                                            \
@@ -147,7 +151,8 @@ struct topk_by_radix_sort<MAX_INTERNAL_TOPK,
                        topk_by_radix_sort_base<MAX_INTERNAL_TOPK>::state_bit_lenght, \
                        topk_by_radix_sort_base<MAX_INTERNAL_TOPK>::vecLen,           \
                        V,                                                            \
-                       V / 4>(                                                       \
+                       V / 4,                                                        \
+                       IdxT>(                                                        \
         topk, len_x, _x, _in_vals, _y, _out_vals, state, _hints, sort, _smem);       \
     }                                                                                \
   };
@@ -156,12 +161,11 @@ TOP_FUNC_PARTIAL_SPECIALIZATION(256);
 TOP_FUNC_PARTIAL_SPECIALIZATION(512);
 TOP_FUNC_PARTIAL_SPECIALIZATION(1024);
 
-template <unsigned MAX_CANDIDATES, unsigned MULTI_WARPS = 0>
-__device__ inline void topk_by_bitonic_sort_1st(
-  float* candidate_distances,        // [num_candidates]
-  std::uint32_t* candidate_indices,  // [num_candidates]
-  const std::uint32_t num_candidates,
-  const std::uint32_t num_itopk)
+template <unsigned MAX_CANDIDATES, unsigned MULTI_WARPS = 0, class IdxT = void>
+__device__ inline void topk_by_bitonic_sort_1st(float* candidate_distances,  // [num_candidates]
+                                                IdxT* candidate_indices,     // [num_candidates]
+                                                const std::uint32_t num_candidates,
+                                                const std::uint32_t num_itopk)
 {
   const unsigned lane_id = threadIdx.x % 32;
   const unsigned warp_id = threadIdx.x / 32;
@@ -169,7 +173,7 @@ __device__ inline void topk_by_bitonic_sort_1st(
     if (warp_id > 0) { return; }
     constexpr unsigned N = (MAX_CANDIDATES + 31) / 32;
     float key[N];
-    std::uint32_t val[N];
+    IdxT val[N];
     /* Candidates -> Reg */
     for (unsigned i = 0; i < N; i++) {
       unsigned j = lane_id + (32 * i);
@@ -178,11 +182,11 @@ __device__ inline void topk_by_bitonic_sort_1st(
         val[i] = candidate_indices[j];
       } else {
         key[i] = utils::get_max_value<float>();
-        val[i] = utils::get_max_value<std::uint32_t>();
+        val[i] = utils::get_max_value<IdxT>();
       }
     }
     /* Sort */
-    bitonic::warp_sort<float, std::uint32_t, N>(key, val);
+    bitonic::warp_sort<float, IdxT, N>(key, val);
     /* Reg -> Temp_itopk */
     for (unsigned i = 0; i < N; i++) {
       unsigned j = (N * lane_id) + i;
@@ -196,7 +200,7 @@ __device__ inline void topk_by_bitonic_sort_1st(
     constexpr unsigned max_candidates_per_warp = (MAX_CANDIDATES + 1) / 2;
     constexpr unsigned N                       = (max_candidates_per_warp + 31) / 32;
     float key[N];
-    std::uint32_t val[N];
+    IdxT val[N];
     if (warp_id < 2) {
       /* Candidates -> Reg */
       for (unsigned i = 0; i < N; i++) {
@@ -207,11 +211,11 @@ __device__ inline void topk_by_bitonic_sort_1st(
           val[i] = candidate_indices[j];
         } else {
           key[i] = utils::get_max_value<float>();
-          val[i] = utils::get_max_value<std::uint32_t>();
+          val[i] = utils::get_max_value<IdxT>();
         }
       }
       /* Sort */
-      bitonic::warp_sort<float, std::uint32_t, N>(key, val);
+      bitonic::warp_sort<float, IdxT, N>(key, val);
       /* Reg -> Temp_candidates */
       for (unsigned i = 0; i < N; i++) {
         unsigned jl = (N * lane_id) + i;
@@ -244,7 +248,7 @@ __device__ inline void topk_by_bitonic_sort_1st(
     if (num_warps_used > 1) { __syncthreads(); }
     if (warp_id < num_warps_used) {
       /* Merge */
-      bitonic::warp_merge<float, std::uint32_t, N>(key, val, 32);
+      bitonic::warp_merge<float, IdxT, N>(key, val, 32);
       /* Reg -> Temp_itopk */
       for (unsigned i = 0; i < N; i++) {
         unsigned jl = (N * lane_id) + i;
@@ -259,16 +263,15 @@ __device__ inline void topk_by_bitonic_sort_1st(
   }
 }
 
-template <unsigned MAX_ITOPK, unsigned MULTI_WARPS = 0>
-__device__ inline void topk_by_bitonic_sort_2nd(
-  float* itopk_distances,            // [num_itopk]
-  std::uint32_t* itopk_indices,      // [num_itopk]
-  const std::uint32_t num_itopk,
-  float* candidate_distances,        // [num_candidates]
-  std::uint32_t* candidate_indices,  // [num_candidates]
-  const std::uint32_t num_candidates,
-  std::uint32_t* work_buf,
-  const bool first)
+template <unsigned MAX_ITOPK, unsigned MULTI_WARPS = 0, class IdxT = void>
+__device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances,      // [num_itopk]
+                                                IdxT* itopk_indices,         // [num_itopk]
+                                                const std::uint32_t num_itopk,
+                                                float* candidate_distances,  // [num_candidates]
+                                                IdxT* candidate_indices,     // [num_candidates]
+                                                const std::uint32_t num_candidates,
+                                                std::uint32_t* work_buf,
+                                                const bool first)
 {
   const unsigned lane_id = threadIdx.x % 32;
   const unsigned warp_id = threadIdx.x / 32;
@@ -276,7 +279,7 @@ __device__ inline void topk_by_bitonic_sort_2nd(
     if (warp_id > 0) { return; }
     constexpr unsigned N = (MAX_ITOPK + 31) / 32;
     float key[N];
-    std::uint32_t val[N];
+    IdxT val[N];
     if (first) {
       /* Load itopk results */
       for (unsigned i = 0; i < N; i++) {
@@ -286,11 +289,11 @@ __device__ inline void topk_by_bitonic_sort_2nd(
           val[i] = itopk_indices[j];
         } else {
           key[i] = utils::get_max_value<float>();
-          val[i] = utils::get_max_value<std::uint32_t>();
+          val[i] = utils::get_max_value<IdxT>();
         }
       }
       /* Warp Sort */
-      bitonic::warp_sort<float, std::uint32_t, N>(key, val);
+      bitonic::warp_sort<float, IdxT, N>(key, val);
     } else {
       /* Load itopk results */
       for (unsigned i = 0; i < N; i++) {
@@ -300,7 +303,7 @@ __device__ inline void topk_by_bitonic_sort_2nd(
           val[i] = itopk_indices[device::swizzling(j)];
         } else {
           key[i] = utils::get_max_value<float>();
-          val[i] = utils::get_max_value<std::uint32_t>();
+          val[i] = utils::get_max_value<IdxT>();
         }
       }
     }
@@ -316,7 +319,7 @@ __device__ inline void topk_by_bitonic_sort_2nd(
       }
     }
     /* Warp Merge */
-    bitonic::warp_merge<float, std::uint32_t, N>(key, val, 32);
+    bitonic::warp_merge<float, IdxT, N>(key, val, 32);
     /* Store new itopk results */
     for (unsigned i = 0; i < N; i++) {
       unsigned j = (N * lane_id) + i;
@@ -330,7 +333,7 @@ __device__ inline void topk_by_bitonic_sort_2nd(
     constexpr unsigned max_itopk_per_warp = (MAX_ITOPK + 1) / 2;
     constexpr unsigned N                  = (max_itopk_per_warp + 31) / 32;
     float key[N];
-    std::uint32_t val[N];
+    IdxT val[N];
     if (first) {
       /* Load itop results (not sorted) */
       if (warp_id < 2) {
@@ -341,11 +344,11 @@ __device__ inline void topk_by_bitonic_sort_2nd(
             val[i] = itopk_indices[j];
           } else {
             key[i] = utils::get_max_value<float>();
-            val[i] = utils::get_max_value<std::uint32_t>();
+            val[i] = utils::get_max_value<IdxT>();
           }
         }
         /* Warp Sort */
-        bitonic::warp_sort<float, std::uint32_t, N>(key, val);
+        bitonic::warp_sort<float, IdxT, N>(key, val);
         /* Store intermedidate results */
         for (unsigned i = 0; i < N; i++) {
           unsigned j = (N * threadIdx.x) + i;
@@ -369,7 +372,7 @@ __device__ inline void topk_by_bitonic_sort_2nd(
           }
         }
         /* Warp Merge */
-        bitonic::warp_merge<float, std::uint32_t, N>(key, val, 32);
+        bitonic::warp_merge<float, IdxT, N>(key, val, 32);
       }
       __syncthreads();
       /* Store itopk results (sorted) */
@@ -414,8 +417,8 @@ __device__ inline void topk_by_bitonic_sort_2nd(
       if (key_0 > key_1) {
         itopk_distances[device::swizzling(j)] = key_1;
         itopk_distances[device::swizzling(k)] = key_0;
-        std::uint32_t val_0                   = itopk_indices[device::swizzling(j)];
-        std::uint32_t val_1                   = itopk_indices[device::swizzling(k)];
+        IdxT val_0                            = itopk_indices[device::swizzling(j)];
+        IdxT val_1                            = itopk_indices[device::swizzling(k)];
         itopk_indices[device::swizzling(j)]   = val_1;
         itopk_indices[device::swizzling(k)]   = val_0;
         atomicMin(work_buf + 0, j);
@@ -447,11 +450,11 @@ __device__ inline void topk_by_bitonic_sort_2nd(
           val[i] = itopk_indices[device::swizzling(k)];
         } else {
           key[i] = utils::get_max_value<float>();
-          val[i] = utils::get_max_value<std::uint32_t>();
+          val[i] = utils::get_max_value<IdxT>();
         }
       }
       /* Warp Merge */
-      bitonic::warp_merge<float, std::uint32_t, N>(key, val, 32);
+      bitonic::warp_merge<float, IdxT, N>(key, val, 32);
       /* Store new itopk results */
       for (unsigned i = 0; i < N; i++) {
         const unsigned j = (N * lane_id) + i;
@@ -468,41 +471,44 @@ __device__ inline void topk_by_bitonic_sort_2nd(
 template <unsigned MAX_ITOPK,
           unsigned MAX_CANDIDATES,
           unsigned MULTI_WARPS_1,
-          unsigned MULTI_WARPS_2>
-__device__ void topk_by_bitonic_sort(float* itopk_distances,            // [num_itopk]
-                                     std::uint32_t* itopk_indices,      // [num_itopk]
+          unsigned MULTI_WARPS_2,
+          class IdxT>
+__device__ void topk_by_bitonic_sort(float* itopk_distances,      // [num_itopk]
+                                     IdxT* itopk_indices,         // [num_itopk]
                                      const std::uint32_t num_itopk,
-                                     float* candidate_distances,        // [num_candidates]
-                                     std::uint32_t* candidate_indices,  // [num_candidates]
+                                     float* candidate_distances,  // [num_candidates]
+                                     IdxT* candidate_indices,     // [num_candidates]
                                      const std::uint32_t num_candidates,
                                      std::uint32_t* work_buf,
                                      const bool first)
 {
   // The results in candidate_distances/indices are sorted by bitonic sort.
-  topk_by_bitonic_sort_1st<MAX_CANDIDATES, MULTI_WARPS_1>(
+  topk_by_bitonic_sort_1st<MAX_CANDIDATES, MULTI_WARPS_1, IdxT>(
     candidate_distances, candidate_indices, num_candidates, num_itopk);
 
   // The results sorted above are merged with the internal intermediate top-k
   // results so far using bitonic merge.
-  topk_by_bitonic_sort_2nd<MAX_ITOPK, MULTI_WARPS_2>(itopk_distances,
-                                                     itopk_indices,
-                                                     num_itopk,
-                                                     candidate_distances,
-                                                     candidate_indices,
-                                                     num_candidates,
-                                                     work_buf,
-                                                     first);
+  topk_by_bitonic_sort_2nd<MAX_ITOPK, MULTI_WARPS_2, IdxT>(itopk_distances,
+                                                           itopk_indices,
+                                                           num_itopk,
+                                                           candidate_distances,
+                                                           candidate_indices,
+                                                           num_candidates,
+                                                           work_buf,
+                                                           first);
 }
 
 template <unsigned FIRST_TID, unsigned LAST_TID, class INDEX_T>
-__device__ inline void hashmap_restore(uint32_t* hashmap_ptr,
+__device__ inline void hashmap_restore(INDEX_T* const hashmap_ptr,
                                        const size_t hashmap_bitlen,
                                        const INDEX_T* itopk_indices,
                                        uint32_t itopk_size)
 {
+  constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
   if (threadIdx.x < FIRST_TID || threadIdx.x >= LAST_TID) return;
   for (unsigned i = threadIdx.x - FIRST_TID; i < itopk_size; i += LAST_TID - FIRST_TID) {
-    auto key = itopk_indices[i] & ~0x80000000;  // clear most significant bit
+    auto key = itopk_indices[i] & ~index_msb_1_mask;  // clear most significant bit
     hashmap::insert(hashmap_ptr, hashmap_bitlen, key);
   }
 }
@@ -539,9 +545,9 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
                      const std::uint32_t graph_degree,
                      const unsigned num_distilation,
                      const uint64_t rand_xor_mask,
-                     const INDEX_T* seed_ptr,                   // [num_queries, num_seeds]
+                     const INDEX_T* seed_ptr,             // [num_queries, num_seeds]
                      const uint32_t num_seeds,
-                     std::uint32_t* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
+                     INDEX_T* const visited_hashmap_ptr,  // [num_queries, 1 << hash_bitlen]
                      const std::uint32_t internal_topk,
                      const std::uint32_t num_parents,
                      const std::uint32_t min_iteration,
@@ -587,8 +593,8 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
   auto result_distances_buffer =
     reinterpret_cast<DISTANCE_T*>(result_indices_buffer + result_buffer_size_32);
   auto visited_hash_buffer =
-    reinterpret_cast<std::uint32_t*>(result_distances_buffer + result_buffer_size_32);
-  auto parent_list_buffer = reinterpret_cast<std::uint32_t*>(visited_hash_buffer + small_hash_size);
+    reinterpret_cast<INDEX_T*>(result_distances_buffer + result_buffer_size_32);
+  auto parent_list_buffer = reinterpret_cast<INDEX_T*>(visited_hash_buffer + small_hash_size);
   auto topk_ws            = reinterpret_cast<std::uint32_t*>(parent_list_buffer + num_parents);
   auto terminate_flag     = reinterpret_cast<std::uint32_t*>(topk_ws + 3);
   auto smem_working_ptr   = reinterpret_cast<std::uint32_t*>(terminate_flag + 1);
@@ -608,7 +614,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
   }
 
   // Init hashmap
-  uint32_t* local_visited_hashmap_ptr;
+  INDEX_T* local_visited_hashmap_ptr;
   if (small_hash_bitlen) {
     local_visited_hashmap_ptr = visited_hash_buffer;
   } else {
@@ -693,7 +699,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
     } else {
       _CLK_START();
       // topk with radix block sort
-      topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>{}(
+      topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>{}(
         internal_topk,
         gridDim.x,
         result_buffer_size,
@@ -768,7 +774,11 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
     unsigned ii = i;
     if (TOPK_BY_BITONIC_SORT) { ii = device::swizzling(i); }
     if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[ii]; }
-    result_indices_ptr[j] = result_indices_buffer[ii] & ~0x80000000;  // clear most significant bit
+
+    constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask<INDEX_T>::value;
+
+    result_indices_ptr[j] =
+      result_indices_buffer[ii] & ~index_msb_1_mask;  // clear most significant bit
   }
   if (threadIdx.x == 0 && num_executed_iterations != nullptr) {
     num_executed_iterations[query_id] = iter + 1;
@@ -868,7 +878,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__
                                   const uint64_t rand_xor_mask,                   \
                                   const INDEX_T* seed_ptr,                        \
                                   const uint32_t num_seeds,                       \
-                                  std::uint32_t* const visited_hashmap_ptr,       \
+                                  INDEX_T* const visited_hashmap_ptr,             \
                                   const std::uint32_t itopk_size,                 \
                                   const std::uint32_t num_parents,                \
                                   const std::uint32_t min_iteration,              \
@@ -999,17 +1009,18 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
     const std::uint32_t topk_ws_size = 3;
     const std::uint32_t base_smem_size =
       sizeof(float) * max_dim + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 +
-      sizeof(std::uint32_t) * hashmap::get_size(small_hash_bitlen) +
-      sizeof(std::uint32_t) * num_parents + sizeof(std::uint32_t) * topk_ws_size +
-      sizeof(std::uint32_t);
+      sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * num_parents +
+      sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t);
     smem_size = base_smem_size;
     if (num_itopk_candidates > 256) {
       // Tentatively calculate the required share memory size when radix
       // sort based topk is used, assuming the block size is the maximum.
       if (itopk_size <= 256) {
-        smem_size += topk_by_radix_sort<256, max_block_size>::smem_size * sizeof(std::uint32_t);
+        smem_size +=
+          topk_by_radix_sort<256, max_block_size, INDEX_T>::smem_size * sizeof(std::uint32_t);
       } else {
-        smem_size += topk_by_radix_sort<512, max_block_size>::smem_size * sizeof(std::uint32_t);
+        smem_size +=
+          topk_by_radix_sort<512, max_block_size, INDEX_T>::smem_size * sizeof(std::uint32_t);
       }
     }
 
@@ -1080,32 +1091,38 @@ struct search : search_plan_impl<DATA_T, INDEX_T, DISTANCE_T> {
         constexpr unsigned MAX_ITOPK = 256;
         if (block_size == 256) {
           constexpr unsigned BLOCK_SIZE = 256;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         } else if (block_size == 512) {
           constexpr unsigned BLOCK_SIZE = 512;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         } else {
           constexpr unsigned BLOCK_SIZE = 1024;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         }
       } else {
         constexpr unsigned MAX_ITOPK = 512;
         if (block_size == 256) {
           constexpr unsigned BLOCK_SIZE = 256;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         } else if (block_size == 512) {
           constexpr unsigned BLOCK_SIZE = 512;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         } else {
           constexpr unsigned BLOCK_SIZE = 1024;
-          smem_size += topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE>::smem_size * sizeof(std::uint32_t);
+          smem_size +=
+            topk_by_radix_sort<MAX_ITOPK, BLOCK_SIZE, INDEX_T>::smem_size * sizeof(std::uint32_t);
         }
       }
     }
     RAFT_LOG_DEBUG("# smem_size: %u", smem_size);
     hashmap_size = 0;
     if (small_hash_bitlen == 0) {
-      hashmap_size = sizeof(uint32_t) * max_queries * hashmap::get_size(hash_bitlen);
+      hashmap_size = sizeof(INDEX_T) * max_queries * hashmap::get_size(hash_bitlen);
       hashmap.resize(hashmap_size, resource::get_cuda_stream(res));
     }
     RAFT_LOG_DEBUG("# hashmap_size: %lu", hashmap_size);
diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk.h b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk.h
index ccb65fd0ea..2896dba1f3 100644
--- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk.h
+++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk.h
@@ -27,17 +27,18 @@ size_t _cuann_find_topk_bufferSize(uint32_t topK,
                                    cudaDataType_t sampleDtype = CUDA_R_32F);
 
 //
+template <class ValT>
 void _cuann_find_topk(uint32_t topK,
                       uint32_t sizeBatch,
                       uint32_t numElements,
-                      const float* inputKeys,     // [sizeBatch, ldIK,]
-                      uint32_t ldIK,              // (*) ldIK >= numElements
-                      const uint32_t* inputVals,  // [sizeBatch, ldIV,]
-                      uint32_t ldIV,              // (*) ldIV >= numElements
-                      float* outputKeys,          // [sizeBatch, ldOK,]
-                      uint32_t ldOK,              // (*) ldOK >= topK
-                      uint32_t* outputVals,       // [sizeBatch, ldOV,]
-                      uint32_t ldOV,              // (*) ldOV >= topK
+                      const float* inputKeys,  // [sizeBatch, ldIK,]
+                      uint32_t ldIK,           // (*) ldIK >= numElements
+                      const ValT* inputVals,   // [sizeBatch, ldIV,]
+                      uint32_t ldIV,           // (*) ldIV >= numElements
+                      float* outputKeys,       // [sizeBatch, ldOK,]
+                      uint32_t ldOK,           // (*) ldOK >= topK
+                      ValT* outputVals,        // [sizeBatch, ldOV,]
+                      uint32_t ldOV,           // (*) ldOV >= topK
                       void* workspace,
                       bool sort           = false,
                       uint32_t* hint      = NULL,
@@ -54,4 +55,4 @@ CUDA_DEVICE_HOST_FUNC inline size_t _cuann_aligned(size_t size, size_t unit = 12
   if (size % unit) { size += unit - (size % unit); }
   return size;
 }
-}  // namespace raft::neighbors::experimental::cagra::detail
\ No newline at end of file
+}  // namespace raft::neighbors::experimental::cagra::detail
diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
index 072593550e..5bc4b70791 100644
--- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh
@@ -237,7 +237,7 @@ __device__ inline void update_histogram(int itr,
     }
 #pragma unroll
     for (int v = 0; v < max(vecLen, stateBitLen); v += vecLen) {
-      int iv = i + (num_threads * v);
+      const int iv = i + (num_threads * v);
       if (iv >= nx) break;
 
       struct u32_vector x_u32_vec;
@@ -249,7 +249,7 @@ __device__ inline void update_histogram(int itr,
       }
 #pragma unroll
       for (int u = 0; u < vecLen; u++) {
-        int ivu = iv + u;
+        const int ivu = iv + u;
         if (ivu >= nx) break;
 
         uint8_t mask = (uint8_t)0x1 << (v + u);
@@ -270,7 +270,7 @@ __device__ inline void update_histogram(int itr,
             iState |= mask;
           }
         } else {
-          uint32_t k = (xi - threshold) >> shift;  // 0 <= k
+          const uint32_t k = (xi - threshold) >> shift;  // 0 <= k
           if (k >= num_bins) {
             if (stateBitLen == 8) { iState |= mask; }
           } else if (k + 1 < num_bins) {
@@ -287,15 +287,16 @@ __device__ inline void update_histogram(int itr,
 
 //
 template <int blockDim_x>
-__device__ inline void select_best_index_for_next_threshold(uint32_t topk,
-                                                            uint32_t threshold,
-                                                            uint32_t max_threshold,
-                                                            uint32_t nx_below_threshold,
-                                                            uint32_t num_bins,
-                                                            uint32_t shift,
-                                                            const uint32_t* hist,  // [num_bins]
-                                                            uint32_t* best_index,
-                                                            uint32_t* best_csum)
+__device__ inline void select_best_index_for_next_threshold(
+  const uint32_t topk,
+  const uint32_t threshold,
+  const uint32_t max_threshold,
+  const uint32_t nx_below_threshold,
+  const uint32_t num_bins,
+  const uint32_t shift,
+  const uint32_t* const hist,  // [num_bins]
+  uint32_t* const best_index,
+  uint32_t* const best_csum)
 {
   // Scan the histogram ('hist') and compute csum. Then, find the largest
   // index under the condition that the sum of the number of elements found
@@ -311,7 +312,7 @@ __device__ inline void select_best_index_for_next_threshold(uint32_t topk,
     if (threadIdx.x < num_bins) { csum = hist[threadIdx.x]; }
     BlockScanT(temp_storage).InclusiveSum(csum, csum);
     if (threadIdx.x < num_bins) {
-      uint32_t index = threadIdx.x;
+      const uint32_t index = threadIdx.x;
       if ((nx_below_threshold + csum <= topk) && (threshold + (index << shift) <= max_threshold)) {
         my_index = index;
         my_csum  = csum;
@@ -327,7 +328,7 @@ __device__ inline void select_best_index_for_next_threshold(uint32_t topk,
       BlockScanT(temp_storage).InclusiveSum(csum, csum);
       for (int i = n_data - 1; i >= 0; i--) {
         if (nx_below_threshold + csum[i] > topk) continue;
-        uint32_t index = i + (n_data * threadIdx.x);
+        const uint32_t index = i + (n_data * threadIdx.x);
         if (threshold + (index << shift) > max_threshold) continue;
         my_index = index;
         my_csum  = csum[i];
@@ -342,7 +343,7 @@ __device__ inline void select_best_index_for_next_threshold(uint32_t topk,
       BlockScanT(temp_storage).InclusiveSum(csum, csum);
       for (int i = n_data - 1; i >= 0; i--) {
         if (nx_below_threshold + csum[i] > topk) continue;
-        uint32_t index = i + (n_data * threadIdx.x);
+        const uint32_t index = i + (n_data * threadIdx.x);
         if (threshold + (index << shift) > max_threshold) continue;
         my_index = index;
         my_csum  = csum[i];
@@ -351,9 +352,9 @@ __device__ inline void select_best_index_for_next_threshold(uint32_t topk,
     }
   }
   if (threadIdx.x < num_bins) {
-    int laneid = 31 - __clz(__ballot_sync(0xffffffff, (my_index != 0xffffffff)));
+    const int laneid = 31 - __clz(__ballot_sync(0xffffffff, (my_index != 0xffffffff)));
     if ((threadIdx.x & 0x1f) == laneid) {
-      uint32_t old_index = atomicMax(best_index, my_index);
+      const uint32_t old_index = atomicMax(best_index, my_index);
       if (old_index < my_index) { atomicMax(best_csum, my_csum); }
     }
   }
@@ -362,17 +363,17 @@ __device__ inline void select_best_index_for_next_threshold(uint32_t topk,
 
 //
 template <typename T, int stateBitLen, int vecLen>
-__device__ inline void output_index_below_threshold(uint32_t topk,
-                                                    uint32_t thread_id,
-                                                    uint32_t num_threads,
-                                                    uint32_t threshold,
-                                                    uint32_t nx_below_threshold,
-                                                    const T* x,  // [nx,]
-                                                    uint32_t nx,
+__device__ inline void output_index_below_threshold(const uint32_t topk,
+                                                    const uint32_t thread_id,
+                                                    const uint32_t num_threads,
+                                                    const uint32_t threshold,
+                                                    const uint32_t nx_below_threshold,
+                                                    const T* const x,  // [nx,]
+                                                    const uint32_t nx,
                                                     const uint8_t* state,
-                                                    uint32_t* output,  // [topk]
-                                                    uint32_t* output_count,
-                                                    uint32_t* output_count_eq)
+                                                    uint32_t* const output,  // [topk]
+                                                    uint32_t* const output_count,
+                                                    uint32_t* const output_count_eq)
 {
   int ii = 0;
   for (int i = thread_id * vecLen; i < nx; i += num_threads * max(vecLen, stateBitLen), ii++) {
@@ -383,7 +384,7 @@ __device__ inline void output_index_below_threshold(uint32_t topk,
     }
 #pragma unroll
     for (int v = 0; v < max(vecLen, stateBitLen); v += vecLen) {
-      int iv = i + (num_threads * v);
+      const int iv = i + (num_threads * v);
       if (iv >= nx) break;
 
       struct u32_vector u32_vec;
@@ -395,10 +396,10 @@ __device__ inline void output_index_below_threshold(uint32_t topk,
       }
 #pragma unroll
       for (int u = 0; u < vecLen; u++) {
-        int ivu = iv + u;
+        const int ivu = iv + u;
         if (ivu >= nx) break;
 
-        uint8_t mask = (uint8_t)0x1 << (v + u);
+        const uint8_t mask = (uint8_t)0x1 << (v + u);
         if ((stateBitLen == 8) && (iState & mask)) continue;
 
         uint32_t xi;
@@ -425,9 +426,9 @@ __device__ inline void output_index_below_threshold(uint32_t topk,
 template <typename T>
 __device__ inline void swap(T& val1, T& val2)
 {
-  T val0 = val1;
-  val1   = val2;
-  val2   = val0;
+  const T val0 = val1;
+  val1         = val2;
+  val2         = val0;
 }
 
 //
@@ -493,44 +494,44 @@ __device__ __host__ inline uint32_t get_state_size(uint32_t len_x)
 }
 
 //
-template <int blockDim_x, int stateBitLen, int vecLen, int maxTopk, int numSortThreads>
+template <int blockDim_x, int stateBitLen, int vecLen, int maxTopk, int numSortThreads, class ValT>
 __device__ inline void topk_cta_11_core(uint32_t topk,
                                         uint32_t len_x,
-                                        const uint32_t* _x,        // [size_batch, ld_x,]
-                                        const uint32_t* _in_vals,  // [size_batch, ld_iv,]
-                                        uint32_t* _y,              // [size_batch, ld_y,]
-                                        uint32_t* _out_vals,       // [size_batch, ld_ov,]
-                                        uint8_t* _state,           // [size_batch, ...,]
+                                        const uint32_t* _x,    // [size_batch, ld_x,]
+                                        const ValT* _in_vals,  // [size_batch, ld_iv,]
+                                        uint32_t* _y,          // [size_batch, ld_y,]
+                                        ValT* _out_vals,       // [size_batch, ld_ov,]
+                                        uint8_t* _state,       // [size_batch, ...,]
                                         uint32_t* _hint,
                                         bool sort,
                                         uint32_t* _smem)
 {
-  uint32_t* smem_out_vals = _smem;
-  uint32_t* hist          = &(_smem[2 * maxTopk]);
-  uint32_t* best_index    = &(_smem[2 * maxTopk + 2048]);
-  uint32_t* best_csum     = &(_smem[2 * maxTopk + 2048 + 3]);
+  uint32_t* const smem_out_vals = _smem;
+  uint32_t* const hist          = &(_smem[2 * maxTopk]);
+  uint32_t* const best_index    = &(_smem[2 * maxTopk + 2048]);
+  uint32_t* const best_csum     = &(_smem[2 * maxTopk + 2048 + 3]);
 
   const uint32_t num_threads = blockDim_x;
   const uint32_t thread_id   = threadIdx.x;
   uint32_t nx                = len_x;
-  const uint32_t* x          = _x;
-  const uint32_t* in_vals    = NULL;
+  const uint32_t* const x    = _x;
+  const ValT* in_vals        = NULL;
   if (_in_vals) { in_vals = _in_vals; }
   uint32_t* y = NULL;
   if (_y) { y = _y; }
-  uint32_t* out_vals = NULL;
+  ValT* out_vals = NULL;
   if (_out_vals) { out_vals = _out_vals; }
-  uint8_t* state = _state;
-  uint32_t hint  = (_hint == NULL ? ~0u : *_hint);
+  uint8_t* state      = _state;
+  const uint32_t hint = (_hint == NULL ? ~0u : *_hint);
 
   // Initialize shared memory
   for (int i = 2 * maxTopk + thread_id; i < 2 * maxTopk + 2048 + 8; i += num_threads) {
     _smem[i] = 0;
   }
-  uint32_t* output_count      = &(_smem[2 * maxTopk + 2048 + 6]);
-  uint32_t* output_count_eq   = &(_smem[2 * maxTopk + 2048 + 7]);
-  uint32_t threshold          = 0;
-  uint32_t nx_below_threshold = 0;
+  uint32_t* const output_count    = &(_smem[2 * maxTopk + 2048 + 6]);
+  uint32_t* const output_count_eq = &(_smem[2 * maxTopk + 2048 + 7]);
+  uint32_t threshold              = 0;
+  uint32_t nx_below_threshold     = 0;
   __syncthreads();
 
   //
@@ -601,7 +602,7 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
 
   if (!sort) {
     for (int k = thread_id; k < topk; k += blockDim_x) {
-      uint32_t i = smem_out_vals[k];
+      const uint32_t i = smem_out_vals[k];
       if (y) { y[k] = x[i]; }
       if (out_vals) {
         if (in_vals) {
@@ -616,15 +617,15 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
 
   constexpr int numTopkPerThread = maxTopk / numSortThreads;
   float my_keys[numTopkPerThread];
-  uint32_t my_vals[numTopkPerThread];
+  ValT my_vals[numTopkPerThread];
 
   // Read keys and values to registers
   if (thread_id < numSortThreads) {
     for (int i = 0; i < numTopkPerThread; i++) {
-      int k = thread_id + (numSortThreads * i);
+      const int k = thread_id + (numSortThreads * i);
       if (k < topk) {
-        int j      = smem_out_vals[k];
-        my_keys[i] = ((float*)x)[j];
+        const int j = smem_out_vals[k];
+        my_keys[i]  = ((float*)x)[j];
         if (in_vals) {
           my_vals[i] = in_vals[j];
         } else {
@@ -632,7 +633,7 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
         }
       } else {
         my_keys[i] = FLT_MAX;
-        my_vals[i] = 0xffffffffU;
+        my_vals[i] = ~static_cast<ValT>(0);
       }
     }
   }
@@ -641,21 +642,21 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
 
   // Sorting by thread
   if (thread_id < numSortThreads) {
-    bool ascending = ((thread_id & mask) == 0);
+    const bool ascending = ((thread_id & mask) == 0);
     if (numTopkPerThread == 3) {
-      swap_if_needed<float, uint32_t>(my_keys[0], my_keys[1], my_vals[0], my_vals[1], ascending);
-      swap_if_needed<float, uint32_t>(my_keys[0], my_keys[2], my_vals[0], my_vals[2], ascending);
-      swap_if_needed<float, uint32_t>(my_keys[1], my_keys[2], my_vals[1], my_vals[2], ascending);
+      swap_if_needed<float, ValT>(my_keys[0], my_keys[1], my_vals[0], my_vals[1], ascending);
+      swap_if_needed<float, ValT>(my_keys[0], my_keys[2], my_vals[0], my_vals[2], ascending);
+      swap_if_needed<float, ValT>(my_keys[1], my_keys[2], my_vals[1], my_vals[2], ascending);
     } else {
       for (int j = 0; j < numTopkPerThread / 2; j += 1) {
 #pragma unroll
         for (int i = 0; i < numTopkPerThread; i += 2) {
-          swap_if_needed<float, uint32_t>(
+          swap_if_needed<float, ValT>(
             my_keys[i], my_keys[i + 1], my_vals[i], my_vals[i + 1], ascending);
         }
 #pragma unroll
         for (int i = 1; i < numTopkPerThread - 1; i += 2) {
-          swap_if_needed<float, uint32_t>(
+          swap_if_needed<float, ValT>(
             my_keys[i], my_keys[i + 1], my_vals[i], my_vals[i + 1], ascending);
         }
       }
@@ -667,11 +668,12 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
     uint32_t next_mask = mask << 1;
 
     for (uint32_t curr_mask = mask; curr_mask > 0; curr_mask >>= 1) {
-      bool ascending = ((thread_id & curr_mask) == 0) == ((thread_id & next_mask) == 0);
+      const bool ascending = ((thread_id & curr_mask) == 0) == ((thread_id & next_mask) == 0);
       if (curr_mask >= 32) {
         // inter warp
-        uint32_t* smem_vals = _smem;  // [numTopkPerThread, numSortThreads]
-        float* smem_keys    = (float*)(_smem + numTopkPerThread * numSortThreads);
+        ValT* const smem_vals = reinterpret_cast<ValT*>(_smem);  // [maxTopk]
+        float* const smem_keys =
+          reinterpret_cast<float*>(smem_vals + maxTopk);  // [numTopkPerThread, numSortThreads]
         __syncthreads();
         if (thread_id < numSortThreads) {
 #pragma unroll
@@ -684,9 +686,9 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
         if (thread_id < numSortThreads) {
 #pragma unroll
           for (int i = 0; i < numTopkPerThread; i++) {
-            float opp_key    = smem_keys[(thread_id ^ curr_mask) + (numSortThreads * i)];
-            uint32_t opp_val = smem_vals[(thread_id ^ curr_mask) + (numSortThreads * i)];
-            swap_if_needed<float, uint32_t>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
+            float opp_key = smem_keys[(thread_id ^ curr_mask) + (numSortThreads * i)];
+            ValT opp_val  = smem_vals[(thread_id ^ curr_mask) + (numSortThreads * i)];
+            swap_if_needed<float, ValT>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
           }
         }
       } else {
@@ -694,29 +696,28 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
         if (thread_id < numSortThreads) {
 #pragma unroll
           for (int i = 0; i < numTopkPerThread; i++) {
-            float opp_key    = __shfl_xor_sync(0xffffffff, my_keys[i], curr_mask);
-            uint32_t opp_val = __shfl_xor_sync(0xffffffff, my_vals[i], curr_mask);
-            swap_if_needed<float, uint32_t>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
+            float opp_key = __shfl_xor_sync(0xffffffff, my_keys[i], curr_mask);
+            ValT opp_val  = __shfl_xor_sync(0xffffffff, my_vals[i], curr_mask);
+            swap_if_needed<float, ValT>(my_keys[i], opp_key, my_vals[i], opp_val, ascending);
           }
         }
       }
     }
 
     if (thread_id < numSortThreads) {
-      bool ascending = ((thread_id & next_mask) == 0);
+      const bool ascending = ((thread_id & next_mask) == 0);
       if (numTopkPerThread == 3) {
-        swap_if_needed<float, uint32_t>(my_keys[0], my_keys[1], my_vals[0], my_vals[1], ascending);
-        swap_if_needed<float, uint32_t>(my_keys[0], my_keys[2], my_vals[0], my_vals[2], ascending);
-        swap_if_needed<float, uint32_t>(my_keys[1], my_keys[2], my_vals[1], my_vals[2], ascending);
+        swap_if_needed<float, ValT>(my_keys[0], my_keys[1], my_vals[0], my_vals[1], ascending);
+        swap_if_needed<float, ValT>(my_keys[0], my_keys[2], my_vals[0], my_vals[2], ascending);
+        swap_if_needed<float, ValT>(my_keys[1], my_keys[2], my_vals[1], my_vals[2], ascending);
       } else {
 #pragma unroll
         for (uint32_t curr_mask = numTopkPerThread / 2; curr_mask > 0; curr_mask >>= 1) {
 #pragma unroll
           for (int i = 0; i < numTopkPerThread; i++) {
-            int j = i ^ curr_mask;
+            const int j = i ^ curr_mask;
             if (i > j) continue;
-            swap_if_needed<float, uint32_t>(
-              my_keys[i], my_keys[j], my_vals[i], my_vals[j], ascending);
+            swap_if_needed<float, ValT>(my_keys[i], my_keys[j], my_vals[i], my_vals[j], ascending);
           }
         }
       }
@@ -727,9 +728,9 @@ __device__ inline void topk_cta_11_core(uint32_t topk,
   // Write sorted keys and values
   if (thread_id < numSortThreads) {
     for (int i = 0; i < numTopkPerThread; i++) {
-      int k = i + (numTopkPerThread * thread_id);
+      const int k = i + (numTopkPerThread * thread_id);
       if (k < topk) {
-        if (y) { y[k] = ((uint32_t*)my_keys)[i]; }
+        if (y) { y[k] = reinterpret_cast<uint32_t*>(my_keys)[i]; }
         if (out_vals) { out_vals[k] = my_vals[i]; }
       }
     }
@@ -755,28 +756,32 @@ int _get_vecLen(uint32_t maxSamples, int maxVecLen = MAX_VEC_LENGTH)
 }
 }  // unnamed namespace
 
-template <int blockDim_x, int stateBitLen, int vecLen, int maxTopk, int numSortThreads>
+template <int blockDim_x, int stateBitLen, int vecLen, int maxTopk, int numSortThreads, class ValT>
 __launch_bounds__(1024, 1) __global__
   void kern_topk_cta_11(uint32_t topk,
                         uint32_t size_batch,
                         uint32_t len_x,
-                        const uint32_t* _x,        // [size_batch, ld_x,]
+                        const uint32_t* _x,    // [size_batch, ld_x,]
                         uint32_t ld_x,
-                        const uint32_t* _in_vals,  // [size_batch, ld_iv,]
+                        const ValT* _in_vals,  // [size_batch, ld_iv,]
                         uint32_t ld_iv,
-                        uint32_t* _y,              // [size_batch, ld_y,]
+                        uint32_t* _y,          // [size_batch, ld_y,]
                         uint32_t ld_y,
-                        uint32_t* _out_vals,       // [size_batch, ld_ov,]
+                        ValT* _out_vals,       // [size_batch, ld_ov,]
                         uint32_t ld_ov,
-                        uint8_t* _state,           // [size_batch, ...,]
-                        uint32_t* _hints,          // [size_batch,]
+                        uint8_t* _state,       // [size_batch, ...,]
+                        uint32_t* _hints,      // [size_batch,]
                         bool sort)
 {
-  uint32_t i_batch = blockIdx.x;
+  const uint32_t i_batch = blockIdx.x;
   if (i_batch >= size_batch) return;
-  __shared__ uint32_t _smem[2 * maxTopk + 2048 + 8];
 
-  topk_cta_11_core<blockDim_x, stateBitLen, vecLen, maxTopk, numSortThreads>(
+  constexpr uint32_t smem_len = 2 * maxTopk + 2048 + 8;
+  static_assert(maxTopk * (1 + utils::size_of<ValT>() / utils::size_of<uint32_t>()) <= smem_len,
+                "maxTopk * sizeof(ValT) must be smaller or equal to 8192 byte");
+  __shared__ uint32_t _smem[smem_len];
+
+  topk_cta_11_core<blockDim_x, stateBitLen, vecLen, maxTopk, numSortThreads, ValT>(
     topk,
     len_x,
     (_x == NULL ? NULL : _x + i_batch * ld_x),
@@ -809,17 +814,18 @@ size_t inline _cuann_find_topk_bufferSize(uint32_t topK,
   return workspaceSize;
 }
 
+template <class ValT>
 inline void _cuann_find_topk(uint32_t topK,
                              uint32_t sizeBatch,
                              uint32_t numElements,
-                             const float* inputKeys,     // [sizeBatch, ldIK,]
-                             uint32_t ldIK,              // (*) ldIK >= numElements
-                             const uint32_t* inputVals,  // [sizeBatch, ldIV,]
-                             uint32_t ldIV,              // (*) ldIV >= numElements
-                             float* outputKeys,          // [sizeBatch, ldOK,]
-                             uint32_t ldOK,              // (*) ldOK >= topK
-                             uint32_t* outputVals,       // [sizeBatch, ldOV,]
-                             uint32_t ldOV,              // (*) ldOV >= topK
+                             const float* inputKeys,  // [sizeBatch, ldIK,]
+                             uint32_t ldIK,           // (*) ldIK >= numElements
+                             const ValT* inputVals,   // [sizeBatch, ldIV,]
+                             uint32_t ldIV,           // (*) ldIV >= numElements
+                             float* outputKeys,       // [sizeBatch, ldOK,]
+                             uint32_t ldOK,           // (*) ldOK >= topK
+                             ValT* outputVals,        // [sizeBatch, ldOV,]
+                             uint32_t ldOV,           // (*) ldOV >= topK
                              void* workspace,
                              bool sort,
                              uint32_t* hints,
@@ -845,48 +851,48 @@ inline void _cuann_find_topk(uint32_t topK,
                      uint32_t,
                      const uint32_t*,
                      uint32_t,
-                     const uint32_t*,
+                     const ValT*,
                      uint32_t,
                      uint32_t*,
                      uint32_t,
-                     uint32_t*,
+                     ValT*,
                      uint32_t,
                      uint8_t*,
                      uint32_t*,
                      bool) = nullptr;
 
   // V:vecLen, K:maxTopk, T:numSortThreads
-#define SET_KERNEL_VKT(V, K, T)                                      \
-  do {                                                               \
-    assert(numThreads >= T);                                         \
-    assert((K % T) == 0);                                            \
-    assert((K / T) <= 4);                                            \
-    cta_kernel = kern_topk_cta_11<numThreads, stateBitLen, V, K, T>; \
+#define SET_KERNEL_VKT(V, K, T, ValT)                                      \
+  do {                                                                     \
+    assert(numThreads >= T);                                               \
+    assert((K % T) == 0);                                                  \
+    assert((K / T) <= 4);                                                  \
+    cta_kernel = kern_topk_cta_11<numThreads, stateBitLen, V, K, T, ValT>; \
   } while (0)
 
   // V: vecLen
-#define SET_KERNEL_V(V)                                                                      \
+#define SET_KERNEL_V(V, ValT)                                                                \
   do {                                                                                       \
     if (topK <= 32) {                                                                        \
-      SET_KERNEL_VKT(V, 32, 32);                                                             \
+      SET_KERNEL_VKT(V, 32, 32, ValT);                                                       \
     } else if (topK <= 64) {                                                                 \
-      SET_KERNEL_VKT(V, 64, 32);                                                             \
+      SET_KERNEL_VKT(V, 64, 32, ValT);                                                       \
     } else if (topK <= 96) {                                                                 \
-      SET_KERNEL_VKT(V, 96, 32);                                                             \
+      SET_KERNEL_VKT(V, 96, 32, ValT);                                                       \
     } else if (topK <= 128) {                                                                \
-      SET_KERNEL_VKT(V, 128, 32);                                                            \
+      SET_KERNEL_VKT(V, 128, 32, ValT);                                                      \
     } else if (topK <= 192) {                                                                \
-      SET_KERNEL_VKT(V, 192, 64);                                                            \
+      SET_KERNEL_VKT(V, 192, 64, ValT);                                                      \
     } else if (topK <= 256) {                                                                \
-      SET_KERNEL_VKT(V, 256, 64);                                                            \
+      SET_KERNEL_VKT(V, 256, 64, ValT);                                                      \
     } else if (topK <= 384) {                                                                \
-      SET_KERNEL_VKT(V, 384, 128);                                                           \
+      SET_KERNEL_VKT(V, 384, 128, ValT);                                                     \
     } else if (topK <= 512) {                                                                \
-      SET_KERNEL_VKT(V, 512, 128);                                                           \
+      SET_KERNEL_VKT(V, 512, 128, ValT);                                                     \
     } else if (topK <= 768) {                                                                \
-      SET_KERNEL_VKT(V, 768, 256);                                                           \
+      SET_KERNEL_VKT(V, 768, 256, ValT);                                                     \
     } else if (topK <= 1024) {                                                               \
-      SET_KERNEL_VKT(V, 1024, 256);                                                          \
+      SET_KERNEL_VKT(V, 1024, 256, ValT);                                                    \
     } \
         /* else if (topK <= 1536) { SET_KERNEL_VKT(V, 1536, 512); } */ \
         /* else if (topK <= 2048) { SET_KERNEL_VKT(V, 2048, 512); } */ \
@@ -901,9 +907,9 @@ inline void _cuann_find_topk(uint32_t topK,
 
   int _vecLen = _get_vecLen(ldIK, 2);
   if (_vecLen == 2) {
-    SET_KERNEL_V(2);
+    SET_KERNEL_V(2, ValT);
   } else if (_vecLen == 1) {
-    SET_KERNEL_V(1);
+    SET_KERNEL_V(1, ValT);
   }
 
   cta_kernel<<<blocks, threads, 0, stream>>>(topK,
@@ -923,4 +929,4 @@ inline void _cuann_find_topk(uint32_t topK,
 
   return;
 }
-}  // namespace raft::neighbors::experimental::cagra::detail
\ No newline at end of file
+}  // namespace raft::neighbors::experimental::cagra::detail
diff --git a/cpp/include/raft/neighbors/detail/cagra/utils.hpp b/cpp/include/raft/neighbors/detail/cagra/utils.hpp
index 3e329c9239..934e84d4d5 100644
--- a/cpp/include/raft/neighbors/detail/cagra/utils.hpp
+++ b/cpp/include/raft/neighbors/detail/cagra/utils.hpp
@@ -128,6 +128,11 @@ _RAFT_HOST_DEVICE inline std::uint32_t get_max_value<std::uint32_t>()
 {
   return 0xffffffffu;
 };
+template <>
+_RAFT_HOST_DEVICE inline std::uint64_t get_max_value<std::uint64_t>()
+{
+  return 0xfffffffffffffffflu;
+};
 
 template <int A, int B, class = void>
 struct constexpr_max {
@@ -138,6 +143,11 @@ template <int A, int B>
 struct constexpr_max<A, B, std::enable_if_t<(B > A), bool>> {
   static const int value = B;
 };
+
+template <class IdxT>
+struct gen_index_msb_1_mask {
+  static constexpr IdxT value = static_cast<IdxT>(1) << (utils::size_of<IdxT>() * 8 - 1);
+};
 }  // namespace utils
 
 }  // namespace raft::neighbors::experimental::cagra::detail
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 98ce8ac5bd..1b4d269d1b 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -316,6 +316,7 @@ if(BUILD_TESTS)
     test/neighbors/ann_cagra/test_float_uint32_t.cu
     test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
     test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+    test/neighbors/ann_cagra/test_float_int64_t.cu
     test/neighbors/ann_ivf_flat/test_float_int64_t.cu
     test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu
     test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu
diff --git a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu
new file mode 100644
index 0000000000..e473a72b2b
--- /dev/null
+++ b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#undef RAFT_EXPLICIT_INSTANTIATE_ONLY
+#include "../ann_cagra.cuh"
+
+namespace raft::neighbors::experimental::cagra {
+
+typedef AnnCagraTest<float, float, std::int64_t> AnnCagraTestF_I64;
+TEST_P(AnnCagraTestF_I64, AnnCagra) { this->testCagra(); }
+
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_I64, ::testing::ValuesIn(inputs));
+
+}  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
index adb44a9264..dbaf4dedd9 100644
--- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu
@@ -20,13 +20,13 @@
 
 namespace raft::neighbors::experimental::cagra {
 
-typedef AnnCagraTest<float, float, std::uint32_t> AnnCagraTestF;
-TEST_P(AnnCagraTestF, AnnCagra) { this->testCagra(); }
+typedef AnnCagraTest<float, float, std::uint32_t> AnnCagraTestF_U32;
+TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); }
 
-typedef AnnCagraSortTest<float, float, std::uint32_t> AnnCagraSortTestF;
-TEST_P(AnnCagraSortTestF, AnnCagraSort) { this->testCagraSort(); }
+typedef AnnCagraSortTest<float, float, std::uint32_t> AnnCagraSortTestF_U32;
+TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); }
 
-INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF, ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
index 11c986c189..ba60131677 100644
--- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu
@@ -20,12 +20,12 @@
 
 namespace raft::neighbors::experimental::cagra {
 
-typedef AnnCagraTest<float, std::int8_t, std::uint32_t> AnnCagraTestI8;
-TEST_P(AnnCagraTestI8, AnnCagra) { this->testCagra(); }
-typedef AnnCagraSortTest<float, std::int8_t, std::uint32_t> AnnCagraSortTestI8;
-TEST_P(AnnCagraSortTestI8, AnnCagraSort) { this->testCagraSort(); }
+typedef AnnCagraTest<float, std::int8_t, std::uint32_t> AnnCagraTestI8_U32;
+TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); }
+typedef AnnCagraSortTest<float, std::int8_t, std::uint32_t> AnnCagraSortTestI8_U32;
+TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); }
 
-INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8, ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
index 51d4feeed2..cc172e4833 100644
--- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
+++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu
@@ -20,13 +20,13 @@
 
 namespace raft::neighbors::experimental::cagra {
 
-typedef AnnCagraTest<float, std::uint8_t, std::uint32_t> AnnCagraTestU8;
-TEST_P(AnnCagraTestU8, AnnCagra) { this->testCagra(); }
+typedef AnnCagraTest<float, std::uint8_t, std::uint32_t> AnnCagraTestU8_U32;
+TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); }
 
-typedef AnnCagraSortTest<float, std::uint8_t, std::uint32_t> AnnCagraSortTestU8;
-TEST_P(AnnCagraSortTestU8, AnnCagraSort) { this->testCagraSort(); }
+typedef AnnCagraSortTest<float, std::uint8_t, std::uint32_t> AnnCagraSortTestU8_U32;
+TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); }
 
-INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8, ::testing::ValuesIn(inputs));
-INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs));
+INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs));
 
 }  // namespace raft::neighbors::experimental::cagra

From cdf107bbd415e756f87a3eee605bc35cdf81e113 Mon Sep 17 00:00:00 2001
From: Alexander Guzhva <alexanderguzhva@gmail.com>
Date: Fri, 19 May 2023 22:37:48 +0000
Subject: [PATCH 70/78] Introduce sample filtering to IVFPQ index search
 (#1513)

A prototype that introduces a per-sample filtering for IVFPQ search. Please feel free to use it as a foundation for the future change, if appropriate, because the code is functional, but is not super clean-and-neat.

The diff introduces a template parameter called `SampleFilterT`. An instance is expected
* to be `SampleFilterT()` constructible (which was mostly needed to define a default behavior in the form of `SampleFilterT sample_filter=SampleFilterT()`, see below)
* to provide a `inline __device__ bool operator(...)`  that returns `true` is a given sample is valid for being used against a given query in IVFPQ search

The default filter (that I set as a default one in certain facilities in the form of `typename SampleFilterT = NoneSampleFilter` in order not to modify way to many files) allows all samples to be used:
```
struct NoneSampleFilter {
  inline __device__ __host__ bool operator()(
    // query index
    const uint32_t query_ix,
    // the current inverted list index
    const uint32_t cluster_ix,
    // the index of the current sample inside the current inverted list
    const uint32_t sample_ix
  ) const {
    return true;
  }
};
```
Here `__host__` is needed for a CPU-based testing only.
Also, I've provided an implementation of `BitMaskSampleFilter` that allows to filter samples based on a bit mask, as an example. The implementation was tested in the semi-production environment.

All the filter-related files were added to `cpp/include/raft/neighbors/detail/sample_filter.cuh`.

I did not change the default `ivf_pq_search()` method remains unchanged, but one more `ivf_pq_search_with_filtering()` method with an additional template argument `SampleFilterT` and one more input parameter was introduced.
```
template <typename T, typename IdxT, typename SampleFilterT>
void search_with_filtering(raft::device_resources const& handle,
            const raft::neighbors::ivf_pq::search_params& params,
            const index<IdxT>& idx,
            const T* queries,
            uint32_t n_queries,
            uint32_t k,
            IdxT* neighbors,
            float* distances,
            rmm::mr::device_memory_resource* mr = nullptr,
            SampleFilterT sample_filter = SampleFilterT());
```

All the current instantiations use `NoneSampleFilter` only.

I've used `SampleFilterT sample_filter` parameters passing instead of `const SampleFilterT sample_filter` in the function calls in order to be able to add some debugging facilities to a filter and with the hope that the compiler is smart enough to understand the de-facto constness if needed.

The filter does not take a computed distance score into account by design, thus the current implementation cannot have a distance threshold. This can be easily changed, if appropriate.

It is still questionable to me whether this filtering needs to be injected right inside the search kernel instead of doing post-processing, please let me know if you have any thoughts on the topic.

I'm happy to address the comments.

Thanks.

Authors:
  - Alexander Guzhva (https://github.com/alexanderguzhva)
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1513
---
 .../detail/ivf_pq_compute_similarity-ext.cuh  | 124 +++++++++++-------
 .../detail/ivf_pq_compute_similarity-inl.cuh  |  92 +++++++++----
 .../raft/neighbors/detail/ivf_pq_search.cuh   |  42 +++---
 .../raft/neighbors/detail/sample_filter.cuh   | 116 ++++++++++++++++
 cpp/include/raft/neighbors/ivf_pq-ext.cuh     |  21 +++
 cpp/include/raft/neighbors/ivf_pq-inl.cuh     |  92 ++++++++++---
 .../ivf_pq_compute_similarity_00_generate.py  |  14 +-
 .../ivf_pq_compute_similarity_float_float.cu  |  80 +++++------
 ...f_pq_compute_similarity_float_fp8_false.cu |  81 ++++++------
 ...vf_pq_compute_similarity_float_fp8_true.cu |  81 ++++++------
 .../ivf_pq_compute_similarity_float_half.cu   |  80 +++++------
 ...vf_pq_compute_similarity_half_fp8_false.cu |  81 ++++++------
 ...ivf_pq_compute_similarity_half_fp8_true.cu |  81 ++++++------
 .../ivf_pq_compute_similarity_half_half.cu    |  80 +++++------
 14 files changed, 687 insertions(+), 378 deletions(-)
 create mode 100644 cpp/include/raft/neighbors/detail/sample_filter.cuh

diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
index 41e9fda701..62e46e3ae1 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh
@@ -20,6 +20,7 @@
 #include <raft/core/detail/macros.hpp>               // RAFT_WEAK_FUNCTION
 #include <raft/distance/distance_types.hpp>          // raft::distance::DistanceType
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>  // raft::neighbors::ivf_pq::detail::fp_8bit
+#include <raft/neighbors/detail/sample_filter.cuh>   // NoneSampleFilter
 #include <raft/neighbors/ivf_pq_types.hpp>           // raft::neighbors::ivf_pq::codebook_gen
 #include <raft/util/raft_explicit.hpp>               // RAFT_EXPLICIT
 #include <rmm/cuda_stream_view.hpp>                  // rmm::cuda_stream_view
@@ -36,6 +37,7 @@ auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k, uint32_t n_probes, ui
 
 template <typename OutT,
           typename LutT,
+          typename SampleFilterT,
           uint32_t PqBits,
           int Capacity,
           bool PrecompBaseDiff,
@@ -45,6 +47,7 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
                                           uint32_t n_probes,
                                           uint32_t pq_dim,
                                           uint32_t n_queries,
+                                          uint32_t queries_offset,
                                           distance::DistanceType metric,
                                           codebook_gen codebook_kind,
                                           uint32_t topk,
@@ -57,32 +60,34 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
                                           const float* queries,
                                           const uint32_t* index_list,
                                           float* query_kths,
+                                          SampleFilterT sample_filter,
                                           LutT* lut_scores,
                                           OutT* _out_scores,
                                           uint32_t* _out_indices) RAFT_EXPLICIT;
 
 // The signature of the kernel defined by a minimal set of template parameters
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT>
 using compute_similarity_kernel_t =
-  decltype(&compute_similarity_kernel<OutT, LutT, 8, 0, true, true>);
+  decltype(&compute_similarity_kernel<OutT, LutT, SampleFilterT, 8, 0, true, true>);
 
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT>
 struct selected {
-  compute_similarity_kernel_t<OutT, LutT> kernel;
+  compute_similarity_kernel_t<OutT, LutT, SampleFilterT> kernel;
   dim3 grid_dim;
   dim3 block_dim;
   size_t smem_size;
   size_t device_lut_size;
 };
 
-template <typename OutT, typename LutT>
-void compute_similarity_run(selected<OutT, LutT> s,
+template <typename OutT, typename LutT, typename SampleFilterT>
+void compute_similarity_run(selected<OutT, LutT, SampleFilterT> s,
                             rmm::cuda_stream_view stream,
                             uint32_t n_rows,
                             uint32_t dim,
                             uint32_t n_probes,
                             uint32_t pq_dim,
                             uint32_t n_queries,
+                            uint32_t queries_offset,
                             distance::DistanceType metric,
                             codebook_gen codebook_kind,
                             uint32_t topk,
@@ -95,6 +100,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
                             const float* queries,
                             const uint32_t* index_list,
                             float* query_kths,
+                            SampleFilterT sample_filter,
                             LutT* lut_scores,
                             OutT* _out_scores,
                             uint32_t* _out_indices) RAFT_EXPLICIT;
@@ -113,7 +119,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
  *    beyond this limit do not consider increasing the number of active blocks per SM
  *    would improve locality anymore.
  */
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT>
 auto compute_similarity_select(const cudaDeviceProp& dev_props,
                                bool manage_local_topk,
                                int locality_hint,
@@ -123,62 +129,78 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
                                uint32_t precomp_data_count,
                                uint32_t n_queries,
                                uint32_t n_probes,
-                               uint32_t topk) -> selected<OutT, LutT> RAFT_EXPLICIT;
+                               uint32_t topk) -> selected<OutT, LutT, SampleFilterT> RAFT_EXPLICIT;
 
 }  // namespace raft::neighbors::ivf_pq::detail
 
 #endif  // RAFT_EXPLICIT_INSTANTIATE_ONLY
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)         \
-  extern template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                           \
-    bool manage_local_topk,                                                                    \
-    int locality_hint,                                                                         \
-    double preferred_shmem_carveout,                                                           \
-    uint32_t pq_bits,                                                                          \
-    uint32_t pq_dim,                                                                           \
-    uint32_t precomp_data_count,                                                               \
-    uint32_t n_queries,                                                                        \
-    uint32_t n_probes,                                                                         \
-    uint32_t topk)                                                                             \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                                   \
-                                                                                               \
-  extern template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                                   \
-    rmm::cuda_stream_view stream,                                                              \
-    uint32_t n_rows,                                                                           \
-    uint32_t dim,                                                                              \
-    uint32_t n_probes,                                                                         \
-    uint32_t pq_dim,                                                                           \
-    uint32_t n_queries,                                                                        \
-    raft::distance::DistanceType metric,                                                       \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                       \
-    uint32_t topk,                                                                             \
-    uint32_t max_samples,                                                                      \
-    const float* cluster_centers,                                                              \
-    const float* pq_centers,                                                                   \
-    const uint8_t* const* pq_dataset,                                                          \
-    const uint32_t* cluster_labels,                                                            \
-    const uint32_t* _chunk_indices,                                                            \
-    const float* queries,                                                                      \
-    const uint32_t* index_list,                                                                \
-    float* query_kths,                                                                         \
-    LutT* lut_scores,                                                                          \
-    OutT* _out_scores,                                                                         \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  extern template auto                                                                   \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  extern template void                                                                   \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+  half,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(half, half);
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, half);
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, float);
+  half,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+  half, half, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+  float, half, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, float, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
index bc899c7ca7..37174f54e1 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh
@@ -19,6 +19,7 @@
 #include <raft/distance/distance_types.hpp>  // raft::distance::DistanceType
 #include <raft/matrix/detail/select_warpsort.cuh>  // matrix::detail::select::warpsort::warp_sort_distributed
 #include <raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh>  // dummy_block_sort_t
+#include <raft/neighbors/detail/sample_filter.cuh>            // NoneSampleFilter
 #include <raft/neighbors/ivf_pq_types.hpp>                    // codebook_gen
 #include <raft/util/cuda_rt_essentials.hpp>                   // RAFT_CUDA_TRY
 #include <raft/util/device_atomics.cuh>                       // raft::atomicMin
@@ -200,6 +201,9 @@ __device__ auto ivfpq_compute_score(uint32_t pq_dim,
  * @param pq_dim
  *   The dimensionality of an encoded vector after compression by PQ.
  * @param n_queries the number of queries.
+ * @param queries_offset
+ *   An offset of the current query batch. It is used for feeding sample_filter with the
+ *   correct query index.
  * @param metric the distance type.
  * @param codebook_kind Defines the way PQ codebooks have been trained.
  * @param topk the `k` in the select top-k.
@@ -221,6 +225,12 @@ __device__ auto ivfpq_compute_score(uint32_t pq_dim,
  * @param index_list
  *   An optional device pointer to the enforced order of search [n_queries, n_probes].
  *   One can pass reordered indices here to try to improve data reading locality.
+ * @param query_kth
+ *   query_kths keep the current state of the filtering - atomically updated distances to the
+ *   k-th closest neighbors for each query [n_queries].
+ * @param sample_filter
+ *   A filter that selects samples for a given query. Use an instance of NoneSampleFilter to
+ *   provide a green light for every sample.
  * @param lut_scores
  *   The device pointer for storing the lookup table globally [gridDim.x, pq_dim << PqBits].
  *   Ignored when `EnableSMemLut == true`.
@@ -236,6 +246,7 @@ __device__ auto ivfpq_compute_score(uint32_t pq_dim,
  */
 template <typename OutT,
           typename LutT,
+          typename SampleFilterT,
           uint32_t PqBits,
           int Capacity,
           bool PrecompBaseDiff,
@@ -245,6 +256,7 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
                                           uint32_t n_probes,
                                           uint32_t pq_dim,
                                           uint32_t n_queries,
+                                          uint32_t queries_offset,
                                           distance::DistanceType metric,
                                           codebook_gen codebook_kind,
                                           uint32_t topk,
@@ -257,6 +269,7 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
                                           const float* queries,
                                           const uint32_t* index_list,
                                           float* query_kths,
+                                          SampleFilterT sample_filter,
                                           LutT* lut_scores,
                                           OutT* _out_scores,
                                           uint32_t* _out_indices)
@@ -447,7 +460,8 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
          i += blockDim.x, pq_thread_data += pq_line_width) {
       OutT score = kDummy;
       bool valid = i < n_samples;
-      if (valid) {
+      // Check bounds and that the sample is acceptable for the query
+      if (valid && sample_filter(queries_offset + query_ix, label, i)) {
         score = ivfpq_compute_score<OutT, LutT, vec_t, PqBits>(
           pq_dim,
           reinterpret_cast<const vec_t::io_t*>(pq_thread_data),
@@ -479,22 +493,27 @@ __global__ void compute_similarity_kernel(uint32_t n_rows,
 }
 
 // The signature of the kernel defined by a minimal set of template parameters
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT = NoneSampleFilter>
 using compute_similarity_kernel_t =
-  decltype(&compute_similarity_kernel<OutT, LutT, 8, 0, true, true>);
+  decltype(&compute_similarity_kernel<OutT, LutT, SampleFilterT, 8, 0, true, true>);
 
 // The config struct lifts the runtime parameters to the template parameters
-template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
+template <typename OutT,
+          typename LutT,
+          bool PrecompBaseDiff,
+          bool EnableSMemLut,
+          typename SampleFilterT = NoneSampleFilter>
 struct compute_similarity_kernel_config {
  public:
-  static auto get(uint32_t pq_bits, uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
+  static auto get(uint32_t pq_bits, uint32_t k_max)
+    -> compute_similarity_kernel_t<OutT, LutT, SampleFilterT>
   {
     return kernel_choose_bits(pq_bits, k_max);
   }
 
  private:
   static auto kernel_choose_bits(uint32_t pq_bits, uint32_t k_max)
-    -> compute_similarity_kernel_t<OutT, LutT>
+    -> compute_similarity_kernel_t<OutT, LutT, SampleFilterT>
   {
     switch (pq_bits) {
       case 4: return kernel_try_capacity<4, kMaxCapacity>(k_max);
@@ -507,7 +526,8 @@ struct compute_similarity_kernel_config {
   }
 
   template <uint32_t PqBits, int Capacity>
-  static auto kernel_try_capacity(uint32_t k_max) -> compute_similarity_kernel_t<OutT, LutT>
+  static auto kernel_try_capacity(uint32_t k_max)
+    -> compute_similarity_kernel_t<OutT, LutT, SampleFilterT>
   {
     if constexpr (Capacity > 0) {
       if (k_max == 0 || k_max > Capacity) { return kernel_try_capacity<PqBits, 0>(k_max); }
@@ -515,23 +535,36 @@ struct compute_similarity_kernel_config {
     if constexpr (Capacity > 1) {
       if (k_max * 2 <= Capacity) { return kernel_try_capacity<PqBits, (Capacity / 2)>(k_max); }
     }
-    return compute_similarity_kernel<OutT, LutT, PqBits, Capacity, PrecompBaseDiff, EnableSMemLut>;
+    return compute_similarity_kernel<OutT,
+                                     LutT,
+                                     SampleFilterT,
+                                     PqBits,
+                                     Capacity,
+                                     PrecompBaseDiff,
+                                     EnableSMemLut>;
   }
 };
 
 // A standalone accessor function was necessary to make sure template
 // instantiation work correctly. This accessor function is not used anymore and
 // may be removed.
-template <typename OutT, typename LutT, bool PrecompBaseDiff, bool EnableSMemLut>
+template <typename OutT,
+          typename LutT,
+          bool PrecompBaseDiff,
+          bool EnableSMemLut,
+          typename SampleFilterT = NoneSampleFilter>
 auto get_compute_similarity_kernel(uint32_t pq_bits, uint32_t k_max)
-  -> compute_similarity_kernel_t<OutT, LutT>
+  -> compute_similarity_kernel_t<OutT, LutT, SampleFilterT>
 {
-  return compute_similarity_kernel_config<OutT, LutT, PrecompBaseDiff, EnableSMemLut>::get(pq_bits,
-                                                                                           k_max);
+  return compute_similarity_kernel_config<OutT,
+                                          LutT,
+                                          PrecompBaseDiff,
+                                          EnableSMemLut,
+                                          SampleFilterT>::get(pq_bits, k_max);
 }
 
 /** Estimate the occupancy for the given kernel on the given device. */
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT>
 struct occupancy_t {
   using shmem_unit = Pow2<128>;
 
@@ -542,7 +575,7 @@ struct occupancy_t {
   inline occupancy_t() = default;
   inline occupancy_t(size_t smem,
                      uint32_t n_threads,
-                     compute_similarity_kernel_t<OutT, LutT> kernel,
+                     compute_similarity_kernel_t<OutT, LutT, SampleFilterT> kernel,
                      const cudaDeviceProp& dev_props)
   {
     RAFT_CUDA_TRY(
@@ -553,23 +586,24 @@ struct occupancy_t {
   }
 };
 
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT>
 struct selected {
-  compute_similarity_kernel_t<OutT, LutT> kernel;
+  compute_similarity_kernel_t<OutT, LutT, SampleFilterT> kernel;
   dim3 grid_dim;
   dim3 block_dim;
   size_t smem_size;
   size_t device_lut_size;
 };
 
-template <typename OutT, typename LutT>
-void compute_similarity_run(selected<OutT, LutT> s,
+template <typename OutT, typename LutT, typename SampleFilterT = NoneSampleFilter>
+void compute_similarity_run(selected<OutT, LutT, SampleFilterT> s,
                             rmm::cuda_stream_view stream,
                             uint32_t n_rows,
                             uint32_t dim,
                             uint32_t n_probes,
                             uint32_t pq_dim,
                             uint32_t n_queries,
+                            uint32_t queries_offset,
                             distance::DistanceType metric,
                             codebook_gen codebook_kind,
                             uint32_t topk,
@@ -582,6 +616,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
                             const float* queries,
                             const uint32_t* index_list,
                             float* query_kths,
+                            SampleFilterT sample_filter,
                             LutT* lut_scores,
                             OutT* _out_scores,
                             uint32_t* _out_indices)
@@ -591,6 +626,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
                                                              n_probes,
                                                              pq_dim,
                                                              n_queries,
+                                                             queries_offset,
                                                              metric,
                                                              codebook_kind,
                                                              topk,
@@ -603,6 +639,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
                                                              queries,
                                                              index_list,
                                                              query_kths,
+                                                             sample_filter,
                                                              lut_scores,
                                                              _out_scores,
                                                              _out_indices);
@@ -623,7 +660,7 @@ void compute_similarity_run(selected<OutT, LutT> s,
  *    beyond this limit do not consider increasing the number of active blocks per SM
  *    would improve locality anymore.
  */
-template <typename OutT, typename LutT>
+template <typename OutT, typename LutT, typename SampleFilterT = NoneSampleFilter>
 auto compute_similarity_select(const cudaDeviceProp& dev_props,
                                bool manage_local_topk,
                                int locality_hint,
@@ -633,7 +670,7 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
                                uint32_t precomp_data_count,
                                uint32_t n_queries,
                                uint32_t n_probes,
-                               uint32_t topk) -> selected<OutT, LutT>
+                               uint32_t topk) -> selected<OutT, LutT, SampleFilterT>
 {
   // Shared memory for storing the lookup table
   size_t lut_mem = sizeof(LutT) * (pq_dim << pq_bits);
@@ -705,9 +742,9 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
    the minimum number of blocks (just one, really). Then, we tweak the `n_threads` to further
    optimize occupancy and data locality for the L1 cache.
    */
-  auto conf_fast        = get_compute_similarity_kernel<OutT, LutT, true, true>;
-  auto conf_no_basediff = get_compute_similarity_kernel<OutT, LutT, false, true>;
-  auto conf_no_smem_lut = get_compute_similarity_kernel<OutT, LutT, true, false>;
+  auto conf_fast        = get_compute_similarity_kernel<OutT, LutT, true, true, SampleFilterT>;
+  auto conf_no_basediff = get_compute_similarity_kernel<OutT, LutT, false, true, SampleFilterT>;
+  auto conf_no_smem_lut = get_compute_similarity_kernel<OutT, LutT, true, false, SampleFilterT>;
   auto topk_or_zero     = manage_local_topk ? topk : 0u;
   std::array candidates{std::make_tuple(conf_fast(pq_bits, topk_or_zero), lut_mem + bdf_mem, true),
                         std::make_tuple(conf_no_basediff(pq_bits, topk_or_zero), lut_mem, true),
@@ -716,8 +753,8 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
   // we may allow slightly lower than 100% occupancy;
   constexpr double kTargetOccupancy = 0.75;
   // This struct is used to select the better candidate
-  occupancy_t<OutT, LutT> selected_perf{};
-  selected<OutT, LutT> selected_config;
+  occupancy_t<OutT, LutT, SampleFilterT> selected_perf{};
+  selected<OutT, LutT, SampleFilterT> selected_config;
   for (auto [kernel, smem_size_const, lut_is_in_shmem] : candidates) {
     if (smem_size_const > dev_props.sharedMemPerBlockOptin) {
       // Even a single block cannot fit into an SM due to shmem requirements. Skip the candidate.
@@ -753,7 +790,7 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
       continue;
     }
 
-    occupancy_t<OutT, LutT> cur(smem_size, n_threads, kernel, dev_props);
+    occupancy_t<OutT, LutT, SampleFilterT> cur(smem_size, n_threads, kernel, dev_props);
     if (cur.blocks_per_sm <= 0) {
       // For some reason, we still cannot make this kernel run. Skip the candidate.
       continue;
@@ -768,7 +805,8 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props,
       if (n_threads_tmp < n_threads) {
         while (n_threads_tmp >= n_threads_min) {
           auto smem_size_tmp = max(smem_size_const, ltk_mem(n_threads_tmp));
-          occupancy_t<OutT, LutT> tmp(smem_size_tmp, n_threads_tmp, kernel, dev_props);
+          occupancy_t<OutT, LutT, SampleFilterT> tmp(
+            smem_size_tmp, n_threads_tmp, kernel, dev_props);
           bool select_it = false;
           if (lut_is_in_shmem && locality_hint >= tmp.blocks_per_sm) {
             // Normally, the smaller the block the better for L1 cache hit rate.
diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
index 149ea52b6a..d402a2436b 100644
--- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh
@@ -23,6 +23,7 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity.cuh>
 #include <raft/neighbors/detail/ivf_pq_dummy_block_sort.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
+#include <raft/neighbors/detail/sample_filter.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
 
 #include <raft/core/cudart_utils.hpp>
@@ -414,19 +415,21 @@ constexpr inline auto expected_probe_coresidency(uint32_t n_clusters,
  *   3. split the query batch into smaller chunks, so that the device workspace
  *      is guaranteed to fit into GPU memory.
  */
-template <typename ScoreT, typename LutT, typename IdxT>
+template <typename ScoreT, typename LutT, typename SampleFilterT, typename IdxT>
 void ivfpq_search_worker(raft::resources const& handle,
                          const index<IdxT>& index,
                          uint32_t max_samples,
                          uint32_t n_probes,
                          uint32_t topK,
                          uint32_t n_queries,
+                         uint32_t queries_offset,            // needed for filtering
                          const uint32_t* clusters_to_probe,  // [n_queries, n_probes]
                          const float* query,                 // [n_queries, rot_dim]
                          IdxT* neighbors,                    // [n_queries, topK]
                          float* distances,                   // [n_queries, topK]
                          float scaling_factor,
                          double preferred_shmem_carveout,
+                         SampleFilterT sample_filter,
                          rmm::mr::device_memory_resource* mr)
 {
   auto stream = resource::get_cuda_stream(handle);
@@ -529,16 +532,16 @@ void ivfpq_search_worker(raft::resources const& handle,
   }
 
   auto search_instance =
-    compute_similarity_select<ScoreT, LutT>(resource::get_device_properties(handle),
-                                            manage_local_topk,
-                                            coresidency,
-                                            preferred_shmem_carveout,
-                                            index.pq_bits(),
-                                            index.pq_dim(),
-                                            precomp_data_count,
-                                            n_queries,
-                                            n_probes,
-                                            topK);
+    compute_similarity_select<ScoreT, LutT, SampleFilterT>(resource::get_device_properties(handle),
+                                                           manage_local_topk,
+                                                           coresidency,
+                                                           preferred_shmem_carveout,
+                                                           index.pq_bits(),
+                                                           index.pq_dim(),
+                                                           precomp_data_count,
+                                                           n_queries,
+                                                           n_probes,
+                                                           topK);
 
   rmm::device_uvector<LutT> device_lut(search_instance.device_lut_size, stream, mr);
   std::optional<device_vector<float>> query_kths_buf{std::nullopt};
@@ -558,6 +561,7 @@ void ivfpq_search_worker(raft::resources const& handle,
                          n_probes,
                          index.pq_dim(),
                          n_queries,
+                         queries_offset,
                          index.metric(),
                          index.codebook_kind(),
                          topK,
@@ -570,6 +574,7 @@ void ivfpq_search_worker(raft::resources const& handle,
                          query,
                          index_list_sorted,
                          query_kths,
+                         sample_filter,
                          device_lut.data(),
                          distances_buf.data(),
                          neighbors_ptr);
@@ -605,10 +610,10 @@ void ivfpq_search_worker(raft::resources const& handle,
  * This structure helps selecting a proper instance of the worker search function,
  * which contains a few template parameters.
  */
-template <typename IdxT>
+template <typename IdxT, typename SampleFilterT>
 struct ivfpq_search {
  public:
-  using fun_t = decltype(&ivfpq_search_worker<float, float, IdxT>);
+  using fun_t = decltype(&ivfpq_search_worker<float, float, SampleFilterT, IdxT>);
 
   /**
    * Select an instance of the ivf-pq search function based on search tuning parameters,
@@ -624,7 +629,7 @@ struct ivfpq_search {
   static auto filter_reasonable_instances(const search_params& params) -> fun_t
   {
     if constexpr (sizeof(ScoreT) >= sizeof(LutT)) {
-      return ivfpq_search_worker<ScoreT, LutT, IdxT>;
+      return ivfpq_search_worker<ScoreT, LutT, SampleFilterT, IdxT>;
     } else {
       RAFT_FAIL(
         "Unexpected lut_dtype / internal_distance_dtype combination (%d, %d). "
@@ -712,7 +717,7 @@ inline auto get_max_batch_size(uint32_t k,
 }
 
 /** See raft::spatial::knn::ivf_pq::search docs */
-template <typename T, typename IdxT>
+template <typename T, typename IdxT, typename SampleFilterT = NoneSampleFilter>
 inline void search(raft::resources const& handle,
                    const search_params& params,
                    const index<IdxT>& index,
@@ -721,7 +726,8 @@ inline void search(raft::resources const& handle,
                    uint32_t k,
                    IdxT* neighbors,
                    float* distances,
-                   rmm::mr::device_memory_resource* mr = nullptr)
+                   rmm::mr::device_memory_resource* mr = nullptr,
+                   SampleFilterT sample_filter         = SampleFilterT())
 {
   static_assert(std::is_same_v<T, float> || std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t>,
                 "Unsupported element type.");
@@ -781,7 +787,7 @@ inline void search(raft::resources const& handle,
   rmm::device_uvector<float> rot_queries(max_queries * index.rot_dim(), stream, mr);
   rmm::device_uvector<uint32_t> clusters_to_probe(max_queries * n_probes, stream, mr);
 
-  auto search_instance = ivfpq_search<IdxT>::fun(params, index.metric());
+  auto search_instance = ivfpq_search<IdxT, SampleFilterT>::fun(params, index.metric());
 
   for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) {
     uint32_t queries_batch = min(max_queries, n_queries - offset_q);
@@ -830,12 +836,14 @@ inline void search(raft::resources const& handle,
                       n_probes,
                       k,
                       batch_size,
+                      offset_q + offset_b,
                       clusters_to_probe.data() + uint64_t(n_probes) * offset_b,
                       rot_queries.data() + uint64_t(index.rot_dim()) * offset_b,
                       neighbors + uint64_t(k) * (offset_q + offset_b),
                       distances + uint64_t(k) * (offset_q + offset_b),
                       utils::config<T>::kDivisor / utils::config<float>::kDivisor,
                       params.preferred_shmem_carveout,
+                      sample_filter,
                       mr);
     }
   }
diff --git a/cpp/include/raft/neighbors/detail/sample_filter.cuh b/cpp/include/raft/neighbors/detail/sample_filter.cuh
new file mode 100644
index 0000000000..f5c3d91afe
--- /dev/null
+++ b/cpp/include/raft/neighbors/detail/sample_filter.cuh
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+namespace raft::neighbors::ivf_pq::detail {
+
+/* A filter that filters nothing. This is the default behavior. */
+struct NoneSampleFilter {
+  inline __device__ __host__ bool operator()(
+    // query index
+    const uint32_t query_ix,
+    // the current inverted list index
+    const uint32_t cluster_ix,
+    // the index of the current sample inside the current inverted list
+    const uint32_t sample_ix) const
+  {
+    return true;
+  }
+};
+
+/**
+ * If the filtering depends on the index of a sample, then the following
+ * filter template can be used:
+ *
+ * template <typename IdxT>
+ * struct IndexSampleFilter {
+ *   using index_type = IdxT;
+ *
+ *   const index_type* const* inds_ptr = nullptr;
+ *
+ *   IndexSampleFilter() {}
+ *   IndexSampleFilter(const index_type* const* _inds_ptr)
+ *       : inds_ptr{_inds_ptr} {}
+ *   IndexSampleFilter(const IndexSampleFilter&) = default;
+ *   IndexSampleFilter(IndexSampleFilter&&) = default;
+ *   IndexSampleFilter& operator=(const IndexSampleFilter&) = default;
+ *   IndexSampleFilter& operator=(IndexSampleFilter&&) = default;
+ *
+ *   inline __device__ __host__ bool operator()(
+ *       const uint32_t query_ix,
+ *       const uint32_t cluster_ix,
+ *       const uint32_t sample_ix) const {
+ *     index_type database_idx = inds_ptr[cluster_ix][sample_ix];
+ *
+ *     // return true or false, depending on the database_idx
+ *     return true;
+ *   }
+ * };
+ *
+ * Initialize it as:
+ *   using filter_type = IndexSampleFilter<idx_t>;
+ *   filter_type filter(raft_ivfpq_index.inds_ptrs().data_handle());
+ *
+ * Use it as:
+ *   raft::neighbors::ivf_pq::search_with_filtering<data_t, idx_t, filter_type>(
+ *     ...regular parameters here...,
+ *     filter
+ *   );
+ *
+ * Another example would be the following filter that greenlights samples according
+ * to a contiguous bit mask vector.
+ *
+ * template <typename IdxT>
+ * struct BitMaskSampleFilter {
+ *   using index_type = IdxT;
+ *
+ *   const index_type* const* inds_ptr = nullptr;
+ *   const uint64_t* const bit_mask_ptr = nullptr;
+ *   const int64_t bit_mask_stride_64 = 0;
+ *
+ *   BitMaskSampleFilter() {}
+ *   BitMaskSampleFilter(
+ *       const index_type* const* _inds_ptr,
+ *       const uint64_t* const _bit_mask_ptr,
+ *       const int64_t _bit_mask_stride_64)
+ *       : inds_ptr{_inds_ptr},
+ *         bit_mask_ptr{_bit_mask_ptr},
+ *         bit_mask_stride_64{_bit_mask_stride_64} {}
+ *   BitMaskSampleFilter(const BitMaskSampleFilter&) = default;
+ *   BitMaskSampleFilter(BitMaskSampleFilter&&) = default;
+ *   BitMaskSampleFilter& operator=(const BitMaskSampleFilter&) = default;
+ *   BitMaskSampleFilter& operator=(BitMaskSampleFilter&&) = default;
+ *
+ *   inline __device__ __host__ bool operator()(
+ *       const uint32_t query_ix,
+ *       const uint32_t cluster_ix,
+ *       const uint32_t sample_ix) const {
+ *     const index_type database_idx = inds_ptr[cluster_ix][sample_ix];
+ *     const uint64_t bit_mask_element =
+ *         bit_mask_ptr[query_ix * bit_mask_stride_64 + database_idx / 64];
+ *     const uint64_t masked_bool =
+ *         bit_mask_element & (1ULL << (uint64_t)(database_idx % 64));
+ *     const bool is_bit_set = (masked_bool != 0);
+ *
+ *     return is_bit_set;
+ *   }
+ * };
+ */
+}  // namespace raft::neighbors::ivf_pq::detail
diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
index 42dc776c97..f203709b1b 100644
--- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
@@ -45,6 +45,15 @@ void extend(raft::resources const& handle,
             std::optional<raft::device_vector_view<const IdxT, IdxT, row_major>> new_indices,
             index<IdxT>* idx) RAFT_EXPLICIT;
 
+template <typename T, typename IdxT, typename SampleFilterT>
+void search_with_filtering(raft::resources const& handle,
+                           const search_params& params,
+                           const index<IdxT>& idx,
+                           raft::device_matrix_view<const T, IdxT, row_major> queries,
+                           raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+                           raft::device_matrix_view<float, IdxT, row_major> distances,
+                           SampleFilterT sample_filter) RAFT_EXPLICIT;
+
 template <typename T, typename IdxT>
 void search(raft::resources const& handle,
             const search_params& params,
@@ -74,6 +83,18 @@ void extend(raft::resources const& handle,
             const IdxT* new_indices,
             IdxT n_rows) RAFT_EXPLICIT;
 
+template <typename T, typename IdxT, typename SampleFilterT>
+void search_with_filtering(raft::resources const& handle,
+                           const raft::neighbors::ivf_pq::search_params& params,
+                           const index<IdxT>& idx,
+                           const T* queries,
+                           uint32_t n_queries,
+                           uint32_t k,
+                           IdxT* neighbors,
+                           float* distances,
+                           rmm::mr::device_memory_resource* mr = nullptr,
+                           SampleFilterT sample_filter         = SampleFilterT()) RAFT_EXPLICIT;
+
 template <typename T, typename IdxT>
 void search(raft::resources const& handle,
             const raft::neighbors::ivf_pq::search_params& params,
diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
index 83e7931c78..e2e60f0cd3 100644
--- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
@@ -133,7 +133,7 @@ void extend(raft::resources const& handle,
 }
 
 /**
- * @brief Search ANN using the constructed index.
+ * @brief Search ANN using the constructed index using the given filter.
  *
  * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
  *
@@ -156,14 +156,16 @@ void extend(raft::resources const& handle,
  * [n_queries, k]
  * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
  * k]
+ * @param[in] sample_filter a filter the greenlights samples for a given query.
  */
-template <typename T, typename IdxT>
-void search(raft::resources const& handle,
-            const search_params& params,
-            const index<IdxT>& idx,
-            raft::device_matrix_view<const T, IdxT, row_major> queries,
-            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-            raft::device_matrix_view<float, IdxT, row_major> distances)
+template <typename T, typename IdxT, typename SampleFilterT>
+void search_with_filtering(raft::resources const& handle,
+                           const search_params& params,
+                           const index<IdxT>& idx,
+                           raft::device_matrix_view<const T, IdxT, row_major> queries,
+                           raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+                           raft::device_matrix_view<float, IdxT, row_major> distances,
+                           SampleFilterT sample_filter = SampleFilterT())
 {
   RAFT_EXPECTS(
     queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
@@ -176,15 +178,53 @@ void search(raft::resources const& handle,
                "Number of query dimensions should equal number of dimensions in the index.");
 
   std::uint32_t k = neighbors.extent(1);
-  return detail::search(handle,
-                        params,
-                        idx,
-                        queries.data_handle(),
-                        static_cast<std::uint32_t>(queries.extent(0)),
-                        k,
-                        neighbors.data_handle(),
-                        distances.data_handle(),
-                        resource::get_workspace_resource(handle));
+  detail::search(handle,
+                 params,
+                 idx,
+                 queries.data_handle(),
+                 static_cast<std::uint32_t>(queries.extent(0)),
+                 k,
+                 neighbors.data_handle(),
+                 distances.data_handle(),
+                 resource::get_workspace_resource(handle),
+                 sample_filter);
+}
+
+/**
+ * @brief Search ANN using the constructed index.
+ *
+ * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example.
+ *
+ * Note, this function requires a temporary buffer to store intermediate results between cuda kernel
+ * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can
+ * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or
+ * eliminate entirely allocations happening within `search`.
+ * The exact size of the temporary buffer depends on multiple factors and is an implementation
+ * detail. However, you can safely specify a small initial size for the memory pool, so that only a
+ * few allocations happen to grow it during the first invocations of the `search`.
+ *
+ * @tparam T data element type
+ * @tparam IdxT type of the indices
+ *
+ * @param[in] handle
+ * @param[in] params configure the search
+ * @param[in] idx ivf-pq constructed index
+ * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()]
+ * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset
+ * [n_queries, k]
+ * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
+ * k]
+ */
+template <typename T, typename IdxT>
+void search(raft::resources const& handle,
+            const search_params& params,
+            const index<IdxT>& idx,
+            raft::device_matrix_view<const T, IdxT, row_major> queries,
+            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
+            raft::device_matrix_view<float, IdxT, row_major> distances)
+{
+  search_with_filtering(
+    handle, params, idx, queries, neighbors, distances, detail::NoneSampleFilter());
 }
 
 /** @} */  // end group ivf_pq
@@ -297,6 +337,22 @@ void extend(raft::resources const& handle,
   detail::extend(handle, idx, new_vectors, new_indices, n_rows);
 }
 
+template <typename T, typename IdxT, typename SampleFilterT>
+void search_with_filtering(raft::resources const& handle,
+                           const search_params& params,
+                           const index<IdxT>& idx,
+                           const T* queries,
+                           uint32_t n_queries,
+                           uint32_t k,
+                           IdxT* neighbors,
+                           float* distances,
+                           rmm::mr::device_memory_resource* mr = nullptr,
+                           SampleFilterT sample_filter         = SampleFilterT())
+{
+  detail::search(
+    handle, params, idx, queries, n_queries, k, neighbors, distances, mr, sample_filter);
+}
+
 /**
  * @brief Search ANN using the constructed index.
  *
@@ -350,7 +406,7 @@ void search(raft::resources const& handle,
             float* distances,
             rmm::mr::device_memory_resource* mr = nullptr)
 {
-  return detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr);
+  detail::search(handle, params, idx, queries, n_queries, k, neighbors, distances, mr);
 }
 
 }  // namespace raft::neighbors::ivf_pq
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
index a740d01bd2..ac547626bb 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py
@@ -41,8 +41,8 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT) \\
-    template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \\
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT, SampleFilterT) \\
+    template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \\
         const cudaDeviceProp& dev_props,                                \\
         bool manage_local_topk,                                         \\
         int locality_hint,                                              \\
@@ -52,16 +52,17 @@
         uint32_t precomp_data_count,                                    \\
         uint32_t n_queries,                                             \\
         uint32_t n_probes,                                              \\
-        uint32_t topk) -> raft::neighbors::ivf_pq::detail::selected<OutT, LutT>; \\
+        uint32_t topk) -> raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>; \\
 \\
-    template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>( \\
-        raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,        \\
+    template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>( \\
+        raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,        \\
         rmm::cuda_stream_view stream,                                   \\
         uint32_t n_rows,                                                \\
         uint32_t dim,                                                   \\
         uint32_t n_probes,                                              \\
         uint32_t pq_dim,                                                \\
         uint32_t n_queries,                                             \\
+        uint32_t queries_offset,                                        \\
         raft::distance::DistanceType metric,                                  \\
         raft::neighbors::ivf_pq::codebook_gen codebook_kind,            \\
         uint32_t topk,                                                  \\
@@ -74,6 +75,7 @@
         const float* queries,                                           \\
         const uint32_t* index_list,                                     \\
         float* query_kths,                                              \\
+        SampleFilterT sample_filter,                                    \\
         LutT* lut_scores,                                               \\
         OutT* _out_scores,                                              \\
         uint32_t* _out_indices);
@@ -102,6 +104,6 @@
     path = f"ivf_pq_compute_similarity_{path_key}.cu"
     with open(path, "w") as f:
         f.write(header)
-        f.write(f"instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT});\n")
+        f.write(f"instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT}, raft::neighbors::ivf_pq::detail::NoneSampleFilter);\n")
         f.write(trailer)
     print(f"src/neighbors/detail/{path}")
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
index 956b7010d5..67b67df19f 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
@@ -27,46 +27,52 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, float);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, float, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
index fba72ad1dd..1c97a1c9ba 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
@@ -27,47 +27,54 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+  float,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
index 030f429315..14e2d19fe7 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
@@ -27,47 +27,54 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+  float,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
index 31a4d7d503..7fd3a8d0b2 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
@@ -27,46 +27,52 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(float, half);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  float, half, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
index c623c80446..01df4d87e3 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
@@ -27,47 +27,54 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>);
+  half,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
index f2aaca20db..251515a552 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
@@ -27,47 +27,54 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
 instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
-  half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>);
+  half,
+  raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>,
+  raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 
diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
index 4420b2534b..b29f4bca96 100644
--- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
+++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
@@ -27,46 +27,52 @@
 #include <raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh>
 #include <raft/neighbors/detail/ivf_pq_fp_8bit.cuh>
 
-#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT)  \
-  template auto raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT>( \
-    const cudaDeviceProp& dev_props,                                                    \
-    bool manage_local_topk,                                                             \
-    int locality_hint,                                                                  \
-    double preferred_shmem_carveout,                                                    \
-    uint32_t pq_bits,                                                                   \
-    uint32_t pq_dim,                                                                    \
-    uint32_t precomp_data_count,                                                        \
-    uint32_t n_queries,                                                                 \
-    uint32_t n_probes,                                                                  \
-    uint32_t topk)                                                                      \
-    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT>;                            \
-                                                                                        \
-  template void raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT>(    \
-    raft::neighbors::ivf_pq::detail::selected<OutT, LutT> s,                            \
-    rmm::cuda_stream_view stream,                                                       \
-    uint32_t n_rows,                                                                    \
-    uint32_t dim,                                                                       \
-    uint32_t n_probes,                                                                  \
-    uint32_t pq_dim,                                                                    \
-    uint32_t n_queries,                                                                 \
-    raft::distance::DistanceType metric,                                                \
-    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                \
-    uint32_t topk,                                                                      \
-    uint32_t max_samples,                                                               \
-    const float* cluster_centers,                                                       \
-    const float* pq_centers,                                                            \
-    const uint8_t* const* pq_dataset,                                                   \
-    const uint32_t* cluster_labels,                                                     \
-    const uint32_t* _chunk_indices,                                                     \
-    const float* queries,                                                               \
-    const uint32_t* index_list,                                                         \
-    float* query_kths,                                                                  \
-    LutT* lut_scores,                                                                   \
-    OutT* _out_scores,                                                                  \
+#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(              \
+  OutT, LutT, SampleFilterT)                                                             \
+  template auto                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_select<OutT, LutT, SampleFilterT>( \
+    const cudaDeviceProp& dev_props,                                                     \
+    bool manage_local_topk,                                                              \
+    int locality_hint,                                                                   \
+    double preferred_shmem_carveout,                                                     \
+    uint32_t pq_bits,                                                                    \
+    uint32_t pq_dim,                                                                     \
+    uint32_t precomp_data_count,                                                         \
+    uint32_t n_queries,                                                                  \
+    uint32_t n_probes,                                                                   \
+    uint32_t topk)                                                                       \
+    ->raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT>;              \
+                                                                                         \
+  template void                                                                          \
+  raft::neighbors::ivf_pq::detail::compute_similarity_run<OutT, LutT, SampleFilterT>(    \
+    raft::neighbors::ivf_pq::detail::selected<OutT, LutT, SampleFilterT> s,              \
+    rmm::cuda_stream_view stream,                                                        \
+    uint32_t n_rows,                                                                     \
+    uint32_t dim,                                                                        \
+    uint32_t n_probes,                                                                   \
+    uint32_t pq_dim,                                                                     \
+    uint32_t n_queries,                                                                  \
+    uint32_t queries_offset,                                                             \
+    raft::distance::DistanceType metric,                                                 \
+    raft::neighbors::ivf_pq::codebook_gen codebook_kind,                                 \
+    uint32_t topk,                                                                       \
+    uint32_t max_samples,                                                                \
+    const float* cluster_centers,                                                        \
+    const float* pq_centers,                                                             \
+    const uint8_t* const* pq_dataset,                                                    \
+    const uint32_t* cluster_labels,                                                      \
+    const uint32_t* _chunk_indices,                                                      \
+    const float* queries,                                                                \
+    const uint32_t* index_list,                                                          \
+    float* query_kths,                                                                   \
+    SampleFilterT sample_filter,                                                         \
+    LutT* lut_scores,                                                                    \
+    OutT* _out_scores,                                                                   \
     uint32_t* _out_indices);
 
 #define COMMA ,
-instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(half, half);
+instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(
+  half, half, raft::neighbors::ivf_pq::detail::NoneSampleFilter);
 
 #undef COMMA
 

From a1966456dff8a82eb9429c6e95d805386e3b7857 Mon Sep 17 00:00:00 2001
From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com>
Date: Sat, 20 May 2023 18:34:46 +0200
Subject: [PATCH 71/78] ivf-pq::search: fix the indexing type of the
 query-related mdspan arguments (#1539)

closes https://github.com/rapidsai/raft/issues/1357

breaking change: the type of argument mdspans has slightly changed (second template parameter fixed to `uint32_t`)

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1539
---
 cpp/bench/prims/neighbors/knn.cuh             |  7 +--
 .../neighbors/detail/cagra/cagra_build.cuh    |  6 +--
 cpp/include/raft/neighbors/ivf_pq-ext.cuh     | 48 +++++++++----------
 cpp/include/raft/neighbors/ivf_pq-inl.cuh     | 14 +++---
 .../raft/spatial/knn/detail/ann_quantized.cuh |  6 +--
 .../neighbors/ivfpq_search_float_int64_t.cu   | 36 +++++++-------
 .../neighbors/ivfpq_search_int8_t_int64_t.cu  | 36 +++++++-------
 .../neighbors/ivfpq_search_uint8_t_int64_t.cu | 36 +++++++-------
 cpp/test/neighbors/ann_ivf_pq.cuh             | 10 ++--
 9 files changed, 100 insertions(+), 99 deletions(-)

diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
index 8cdb816dab..e580b20fdc 100644
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ b/cpp/bench/prims/neighbors/knn.cuh
@@ -181,9 +181,10 @@ struct ivf_pq_knn {
   {
     search_params.n_probes = 20;
     auto queries_view =
-      raft::make_device_matrix_view<const ValT, IdxT>(search_items, ps.n_queries, ps.n_dims);
-    auto idxs_view  = raft::make_device_matrix_view<IdxT, IdxT>(out_idxs, ps.n_queries, ps.k);
-    auto dists_view = raft::make_device_matrix_view<dist_t, IdxT>(out_dists, ps.n_queries, ps.k);
+      raft::make_device_matrix_view<const ValT, uint32_t>(search_items, ps.n_queries, ps.n_dims);
+    auto idxs_view = raft::make_device_matrix_view<IdxT, uint32_t>(out_idxs, ps.n_queries, ps.k);
+    auto dists_view =
+      raft::make_device_matrix_view<dist_t, uint32_t>(out_dists, ps.n_queries, ps.k);
     raft::neighbors::ivf_pq::search(
       handle, search_params, *index, queries_view, idxs_view, dists_view);
   }
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
index d88aaa245a..693ab9029d 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
@@ -140,11 +140,11 @@ void build_knn_graph(raft::resources const& res,
     device_memory);
 
   for (const auto& batch : vec_batches) {
-    auto queries_view = raft::make_device_matrix_view<const DataT, int64_t>(
+    auto queries_view = raft::make_device_matrix_view<const DataT, uint32_t>(
       batch.data(), batch.size(), batch.row_width());
-    auto neighbors_view = make_device_matrix_view<int64_t, int64_t>(
+    auto neighbors_view = make_device_matrix_view<int64_t, uint32_t>(
       neighbors.data_handle(), batch.size(), neighbors.extent(1));
-    auto distances_view = make_device_matrix_view<float, int64_t>(
+    auto distances_view = make_device_matrix_view<float, uint32_t>(
       distances.data_handle(), batch.size(), distances.extent(1));
 
     ivf_pq::search(res, *search_params, index, queries_view, neighbors_view, distances_view);
diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
index f203709b1b..5b7391569b 100644
--- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh
@@ -49,18 +49,18 @@ template <typename T, typename IdxT, typename SampleFilterT>
 void search_with_filtering(raft::resources const& handle,
                            const search_params& params,
                            const index<IdxT>& idx,
-                           raft::device_matrix_view<const T, IdxT, row_major> queries,
-                           raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-                           raft::device_matrix_view<float, IdxT, row_major> distances,
+                           raft::device_matrix_view<const T, uint32_t, row_major> queries,
+                           raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,
+                           raft::device_matrix_view<float, uint32_t, row_major> distances,
                            SampleFilterT sample_filter) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT>
 void search(raft::resources const& handle,
             const search_params& params,
             const index<IdxT>& idx,
-            raft::device_matrix_view<const T, IdxT, row_major> queries,
-            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-            raft::device_matrix_view<float, IdxT, row_major> distances) RAFT_EXPLICIT;
+            raft::device_matrix_view<const T, uint32_t, row_major> queries,
+            raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,
+            raft::device_matrix_view<float, uint32_t, row_major> distances) RAFT_EXPLICIT;
 
 template <typename T, typename IdxT = uint32_t>
 auto build(raft::resources const& handle,
@@ -164,24 +164,24 @@ instantiate_raft_neighbors_ivf_pq_extend(uint8_t, int64_t);
 
 #undef instantiate_raft_neighbors_ivf_pq_extend
 
-#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
-  extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
-    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
-    raft::device_matrix_view<float, IdxT, row_major> distances); \
-                                                                 \
-  extern template void raft::neighbors::ivf_pq::search<T, IdxT>( \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    const T* queries,                                            \
-    uint32_t n_queries,                                          \
-    uint32_t k,                                                  \
-    IdxT* neighbors,                                             \
-    float* distances,                                            \
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)            \
+  extern template void raft::neighbors::ivf_pq::search<T, IdxT>(     \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    raft::device_matrix_view<const T, uint32_t, row_major> queries,  \
+    raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,   \
+    raft::device_matrix_view<float, uint32_t, row_major> distances); \
+                                                                     \
+  extern template void raft::neighbors::ivf_pq::search<T, IdxT>(     \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    const T* queries,                                                \
+    uint32_t n_queries,                                              \
+    uint32_t k,                                                      \
+    IdxT* neighbors,                                                 \
+    float* distances,                                                \
     rmm::mr::device_memory_resource* mr)
 
 instantiate_raft_neighbors_ivf_pq_search(float, int64_t);
diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
index e2e60f0cd3..fbe2fcb30d 100644
--- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh
+++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh
@@ -162,9 +162,9 @@ template <typename T, typename IdxT, typename SampleFilterT>
 void search_with_filtering(raft::resources const& handle,
                            const search_params& params,
                            const index<IdxT>& idx,
-                           raft::device_matrix_view<const T, IdxT, row_major> queries,
-                           raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-                           raft::device_matrix_view<float, IdxT, row_major> distances,
+                           raft::device_matrix_view<const T, uint32_t, row_major> queries,
+                           raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,
+                           raft::device_matrix_view<float, uint32_t, row_major> distances,
                            SampleFilterT sample_filter = SampleFilterT())
 {
   RAFT_EXPECTS(
@@ -182,7 +182,7 @@ void search_with_filtering(raft::resources const& handle,
                  params,
                  idx,
                  queries.data_handle(),
-                 static_cast<std::uint32_t>(queries.extent(0)),
+                 queries.extent(0),
                  k,
                  neighbors.data_handle(),
                  distances.data_handle(),
@@ -219,9 +219,9 @@ template <typename T, typename IdxT>
 void search(raft::resources const& handle,
             const search_params& params,
             const index<IdxT>& idx,
-            raft::device_matrix_view<const T, IdxT, row_major> queries,
-            raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
-            raft::device_matrix_view<float, IdxT, row_major> distances)
+            raft::device_matrix_view<const T, uint32_t, row_major> queries,
+            raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,
+            raft::device_matrix_view<float, uint32_t, row_major> distances)
 {
   search_with_filtering(
     handle, params, idx, queries, neighbors, distances, detail::NoneSampleFilter());
diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
index 9f0af8c29e..964292f6cb 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
@@ -117,9 +117,9 @@ void approx_knn_search(raft::resources const& handle,
     params.n_probes = index->nprobe;
 
     auto query_view =
-      raft::make_device_matrix_view<const T, int64_t>(query_array, n, index->ivf_pq->dim());
-    auto indices_view   = raft::make_device_matrix_view<int64_t, int64_t>(indices, n, k);
-    auto distances_view = raft::make_device_matrix_view<float, int64_t>(distances, n, k);
+      raft::make_device_matrix_view<const T, uint32_t>(query_array, n, index->ivf_pq->dim());
+    auto indices_view   = raft::make_device_matrix_view<int64_t, uint32_t>(indices, n, k);
+    auto distances_view = raft::make_device_matrix_view<float, uint32_t>(distances, n, k);
     neighbors::ivf_pq::search(
       handle, params, *index->ivf_pq, query_view, indices_view, distances_view);
   } else {
diff --git a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
index 2bcbe22501..e56c107735 100644
--- a/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_float_int64_t.cu
@@ -17,24 +17,24 @@
 #include <raft/neighbors/ivf_pq-inl.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
-    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
-    raft::device_matrix_view<float, IdxT, row_major> distances); \
-                                                                 \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    const T* queries,                                            \
-    uint32_t n_queries,                                          \
-    uint32_t k,                                                  \
-    IdxT* neighbors,                                             \
-    float* distances,                                            \
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)            \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    raft::device_matrix_view<const T, uint32_t, row_major> queries,  \
+    raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,   \
+    raft::device_matrix_view<float, uint32_t, row_major> distances); \
+                                                                     \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    const T* queries,                                                \
+    uint32_t n_queries,                                              \
+    uint32_t k,                                                      \
+    IdxT* neighbors,                                                 \
+    float* distances,                                                \
     rmm::mr::device_memory_resource* mr)
 
 instantiate_raft_neighbors_ivf_pq_search(float, int64_t);
diff --git a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
index 74432c1963..1efe4f7fb2 100644
--- a/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_int8_t_int64_t.cu
@@ -17,24 +17,24 @@
 #include <raft/neighbors/ivf_pq-inl.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
-    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
-    raft::device_matrix_view<float, IdxT, row_major> distances); \
-                                                                 \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    const T* queries,                                            \
-    uint32_t n_queries,                                          \
-    uint32_t k,                                                  \
-    IdxT* neighbors,                                             \
-    float* distances,                                            \
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)            \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    raft::device_matrix_view<const T, uint32_t, row_major> queries,  \
+    raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,   \
+    raft::device_matrix_view<float, uint32_t, row_major> distances); \
+                                                                     \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    const T* queries,                                                \
+    uint32_t n_queries,                                              \
+    uint32_t k,                                                      \
+    IdxT* neighbors,                                                 \
+    float* distances,                                                \
     rmm::mr::device_memory_resource* mr)
 
 instantiate_raft_neighbors_ivf_pq_search(int8_t, int64_t);
diff --git a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
index 8a05263ca0..e746391443 100644
--- a/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
+++ b/cpp/src/neighbors/ivfpq_search_uint8_t_int64_t.cu
@@ -17,24 +17,24 @@
 #include <raft/neighbors/ivf_pq-inl.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>  // raft::neighbors::ivf_pq::index
 
-#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)        \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    raft::device_matrix_view<const T, IdxT, row_major> queries,  \
-    raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,   \
-    raft::device_matrix_view<float, IdxT, row_major> distances); \
-                                                                 \
-  template void raft::neighbors::ivf_pq::search<T, IdxT>(        \
-    raft::resources const& handle,                               \
-    const raft::neighbors::ivf_pq::search_params& params,        \
-    const raft::neighbors::ivf_pq::index<IdxT>& idx,             \
-    const T* queries,                                            \
-    uint32_t n_queries,                                          \
-    uint32_t k,                                                  \
-    IdxT* neighbors,                                             \
-    float* distances,                                            \
+#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT)            \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    raft::device_matrix_view<const T, uint32_t, row_major> queries,  \
+    raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors,   \
+    raft::device_matrix_view<float, uint32_t, row_major> distances); \
+                                                                     \
+  template void raft::neighbors::ivf_pq::search<T, IdxT>(            \
+    raft::resources const& handle,                                   \
+    const raft::neighbors::ivf_pq::search_params& params,            \
+    const raft::neighbors::ivf_pq::index<IdxT>& idx,                 \
+    const T* queries,                                                \
+    uint32_t n_queries,                                              \
+    uint32_t k,                                                      \
+    IdxT* neighbors,                                                 \
+    float* distances,                                                \
     rmm::mr::device_memory_resource* mr)
 
 instantiate_raft_neighbors_ivf_pq_search(uint8_t, int64_t);
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index 9a6e310303..de4453a034 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -405,11 +405,11 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
     rmm::device_uvector<IdxT> indices_ivf_pq_dev(queries_size, stream_);
 
     auto query_view =
-      raft::make_device_matrix_view<DataT, IdxT>(search_queries.data(), ps.num_queries, ps.dim);
-    auto inds_view =
-      raft::make_device_matrix_view<IdxT, IdxT>(indices_ivf_pq_dev.data(), ps.num_queries, ps.k);
-    auto dists_view =
-      raft::make_device_matrix_view<EvalT, IdxT>(distances_ivf_pq_dev.data(), ps.num_queries, ps.k);
+      raft::make_device_matrix_view<DataT, uint32_t>(search_queries.data(), ps.num_queries, ps.dim);
+    auto inds_view = raft::make_device_matrix_view<IdxT, uint32_t>(
+      indices_ivf_pq_dev.data(), ps.num_queries, ps.k);
+    auto dists_view = raft::make_device_matrix_view<EvalT, uint32_t>(
+      distances_ivf_pq_dev.data(), ps.num_queries, ps.k);
 
     ivf_pq::search<DataT, IdxT>(
       handle_, ps.search_params, index, query_view, inds_view, dists_view);

From 26bc95e5293737f128c21c85fa09435a13f20b36 Mon Sep 17 00:00:00 2001
From: Ben Frederickson <github@benfrederickson.com>
Date: Sat, 20 May 2023 19:22:07 -0700
Subject: [PATCH 72/78] remove device_resources include from linalg::map
 (#1540)

Remove the device_resources.cuh include from linalg::map.

For the implicit integration, I don't have libraries like cusolver in the CI environment, and the build is currently failing with errors like

```
/project/_skbuild/linux-x86_64-3.8/cmake-build/_deps/raft-src/cpp/include/raft/core/device_resources.hpp:31:10:
       fatal error: cusolverDn.h: No such file or directory
```

(from https://github.com/benfred/implicit/actions/runs/5033022104/jobs/9026999533?pr=656)

Fix by not including device_resources here

Authors:
  - Ben Frederickson (https://github.com/benfred)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1540
---
 cpp/include/raft/linalg/detail/map.cuh | 3 ++-
 cpp/test/linalg/map.cu                 | 1 +
 cpp/test/neighbors/ann_cagra.cuh       | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh
index 40739ab54b..0c79dec248 100644
--- a/cpp/include/raft/linalg/detail/map.cuh
+++ b/cpp/include/raft/linalg/detail/map.cuh
@@ -17,7 +17,6 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>  // TODO: remove this
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resources.hpp>
 #include <raft/util/cuda_utils.cuh>
@@ -28,6 +27,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <thrust/tuple.h>
+
 namespace raft::linalg::detail {
 
 template <bool PassOffset, typename OutT, typename IdxT, typename Func, typename... InTs>
diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu
index 97f13c66db..9a88460640 100644
--- a/cpp/test/linalg/map.cu
+++ b/cpp/test/linalg/map.cu
@@ -18,6 +18,7 @@
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/core/operators.hpp>
 #include <raft/core/resource/cuda_stream.hpp>
 #include <raft/linalg/eltwise.cuh>
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index 2d161d3794..63c8114de6 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -22,6 +22,7 @@
 #include <raft_internal/neighbors/naive_knn.cuh>
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/neighbors/cagra.cuh>

From 87597a85d74fef611422f85dc632c8c89cde2c3f Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 22 May 2023 13:24:34 -0400
Subject: [PATCH 73/78] Various updates to the docs for 23.06 release (#1538)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1538
---
 cpp/include/raft/core/device_span.hpp         | 10 +++-
 cpp/include/raft/core/host_mdarray.hpp        | 12 +++++
 cpp/include/raft/core/host_span.hpp           | 12 ++++-
 cpp/include/raft/core/interruptible.hpp       |  9 ++++
 cpp/include/raft/core/mdarray.hpp             | 20 ++++++++
 cpp/include/raft/core/mdspan.hpp              |  9 ++++
 cpp/include/raft/core/span.hpp                |  9 ++++
 cpp/include/raft/linalg/normalize.cuh         |  2 +-
 cpp/include/raft/random/rng.cuh               | 23 +++++++++
 docs/source/cpp_api/core_interruptible.rst    |  7 +--
 docs/source/cpp_api/core_resources.rst        |  6 ++-
 docs/source/cpp_api/distance_1nn.rst          |  2 +-
 docs/source/cpp_api/mdspan_mdarray.rst        | 24 ++--------
 docs/source/cpp_api/mdspan_representation.rst | 14 +++---
 docs/source/cpp_api/mdspan_span.rst           | 15 ++++--
 docs/source/cpp_api/neighbors.rst             |  3 +-
 docs/source/cpp_api/neighbors_cagra.rst       | 21 ++++++++
 docs/source/cpp_api/random_datagen.rst        |  2 +-
 .../cpp_api/random_sampling_univariate.rst    | 48 ++-----------------
 19 files changed, 160 insertions(+), 88 deletions(-)
 create mode 100644 docs/source/cpp_api/neighbors_cagra.rst

diff --git a/cpp/include/raft/core/device_span.hpp b/cpp/include/raft/core/device_span.hpp
index 0730b20bfb..d3350b5e3a 100644
--- a/cpp/include/raft/core/device_span.hpp
+++ b/cpp/include/raft/core/device_span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,10 +20,18 @@
 
 namespace raft {
 
+/**
+ * @defgroup device_span one-dimensional device span type
+ * @{
+ */
+
 /**
  * @brief A span class for device pointer.
  */
 template <typename T, size_t extent = std::experimental::dynamic_extent>
 using device_span = span<T, true, extent>;
 
+/**
+ * @}
+ */
 }  // end namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/host_mdarray.hpp b/cpp/include/raft/core/host_mdarray.hpp
index 9ba29e38d4..02c8914ca1 100644
--- a/cpp/include/raft/core/host_mdarray.hpp
+++ b/cpp/include/raft/core/host_mdarray.hpp
@@ -67,6 +67,11 @@ template <typename ElementType,
           typename LayoutPolicy = layout_c_contiguous>
 using host_matrix = host_mdarray<ElementType, matrix_extent<IndexType>, LayoutPolicy>;
 
+/**
+ * @defgroup host_mdarray_factories factories to create host mdarrays
+ * @{
+ */
+
 /**
  * @brief Create a host mdarray.
  * @tparam ElementType the data type of the matrix elements
@@ -90,6 +95,10 @@ auto make_host_mdarray(raft::resources& res, extents<IndexType, Extents...> exts
   return mdarray_t{res, layout, policy};
 }
 
+/**
+ * @}
+ */
+
 /**
  * @brief Create a host mdarray.
  * @tparam ElementType the data type of the matrix elements
@@ -117,6 +126,7 @@ auto make_host_mdarray(extents<IndexType, Extents...> exts)
 }
 
 /**
+ * @ingroup host_mdarray_factories
  * @brief Create a 2-dim c-contiguous host mdarray.
  * @tparam ElementType the data type of the matrix elements
  * @tparam IndexType the index type of the extents
@@ -157,6 +167,7 @@ auto make_host_matrix(IndexType n_rows, IndexType n_cols)
 }
 
 /**
+ * @ingroup host_mdarray_factories
  * @brief Create a host scalar from v.
  *
  * @tparam ElementType the data type of the scalar element
@@ -206,6 +217,7 @@ auto make_host_scalar(ElementType const& v)
 }
 
 /**
+ * @ingroup host_mdarray_factories
  * @brief Create a 1-dim host mdarray.
  * @tparam ElementType the data type of the vector elements
  * @tparam IndexType the index type of the extents
diff --git a/cpp/include/raft/core/host_span.hpp b/cpp/include/raft/core/host_span.hpp
index 3cad62b7cd..8b37414e76 100644
--- a/cpp/include/raft/core/host_span.hpp
+++ b/cpp/include/raft/core/host_span.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,10 +19,20 @@
 #include <raft/core/span.hpp>
 
 namespace raft {
+
+/**
+ * @defgroup device_span one-dimensional device span type
+ * @{
+ */
+
 /**
  * @brief A span class for host pointer.
  */
 template <typename T, size_t extent = std::experimental::dynamic_extent>
 using host_span = span<T, false, extent>;
 
+/**
+ * @}
+ */
+
 }  // end namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/core/interruptible.hpp b/cpp/include/raft/core/interruptible.hpp
index 62e481a801..f7351c3411 100644
--- a/cpp/include/raft/core/interruptible.hpp
+++ b/cpp/include/raft/core/interruptible.hpp
@@ -30,6 +30,11 @@
 
 namespace raft {
 
+/**
+ * @defgroup interruptible definitions and classes related to the interruptible API
+ * @{
+ */
+
 /**
  * @brief Exception thrown during `interruptible::synchronize` call when it detects a request
  * to cancel the work performed in this CPU thread.
@@ -297,6 +302,10 @@ class interruptible {
   }
 };
 
+/**
+ * @}
+ */
+
 }  // namespace raft
 
 #endif
diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
index 5ae0886ce9..7bd5a28a0c 100644
--- a/cpp/include/raft/core/mdarray.hpp
+++ b/cpp/include/raft/core/mdarray.hpp
@@ -32,6 +32,12 @@
 #include <raft/core/resources.hpp>
 
 namespace raft {
+
+/**
+ * @defgroup mdarray multi-dimensional memory-owning type
+ * @{
+ */
+
 /**
  * @brief Interface to implement an owning multi-dimensional array
  *
@@ -207,6 +213,7 @@ class mdarray
     : cp_(cp), map_(m), c_(cp_.create(handle, map_.required_span_size()))
   {
   }
+
   RAFT_MDARRAY_CTOR_CONSTEXPR mdarray(raft::resources const& handle,
                                       mapping_type const& m,
                                       container_policy_type& cp)
@@ -336,6 +343,15 @@ class mdarray
   container_type c_;
 };
 
+/**
+ * @}
+ */
+
+/**
+ * @defgroup mdarray_reshape Row- or Col-norm computation
+ * @{
+ */
+
 /**
  * @brief Flatten object implementing raft::array_interface into a 1-dim array view
  *
@@ -371,4 +387,8 @@ auto reshape(const array_interface_type& mda, extents<IndexType, Extents...> new
   return reshape(mda.view(), new_shape);
 }
 
+/**
+ * }@
+ */
+
 }  // namespace raft
diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp
index cd9ca26ed9..e87c76d82d 100644
--- a/cpp/include/raft/core/mdspan.hpp
+++ b/cpp/include/raft/core/mdspan.hpp
@@ -213,6 +213,11 @@ constexpr auto make_extents(Extents... exts)
   return extents<IndexType, ((void)exts, dynamic_extent)...>{exts...};
 }
 
+/**
+ * @defgroup mdspan_reshape Row- or Col-norm computation
+ * @{
+ */
+
 /**
  * @brief Flatten raft::mdspan into a 1-dim array view
  *
@@ -298,6 +303,10 @@ RAFT_INLINE_FUNCTION auto unravel_index(Idx idx,
   }
 }
 
+/**
+ * @}
+ */
+
 /**
  * @brief Const accessor specialization for default_accessor
  *
diff --git a/cpp/include/raft/core/span.hpp b/cpp/include/raft/core/span.hpp
index a896ba1977..22906580de 100644
--- a/cpp/include/raft/core/span.hpp
+++ b/cpp/include/raft/core/span.hpp
@@ -32,6 +32,11 @@
 #include <type_traits>
 
 namespace raft {
+
+/**
+ * @defgroup span one-dimensional span type
+ * @{
+ */
 /**
  * @brief The span class defined in ISO C++20.  Iterator is defined as plain pointer and
  *        most of the methods have bound check on debug build.
@@ -274,4 +279,8 @@ auto as_writable_bytes(span<T, is_device, E> s) noexcept
 {
   return {reinterpret_cast<std::byte*>(s.data()), s.size_bytes()};
 }
+
+/**
+ * @}
+ */
 }  // namespace raft
diff --git a/cpp/include/raft/linalg/normalize.cuh b/cpp/include/raft/linalg/normalize.cuh
index 86bc597bdc..6169cf1dfa 100644
--- a/cpp/include/raft/linalg/normalize.cuh
+++ b/cpp/include/raft/linalg/normalize.cuh
@@ -26,7 +26,7 @@ namespace raft {
 namespace linalg {
 
 /**
- * @defgroup norm Row- or Col-norm computation
+ * @defgroup normalize Row- or Col-norm computation
  * @{
  */
 
diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh
index c3b44a7577..a946471312 100644
--- a/cpp/include/raft/random/rng.cuh
+++ b/cpp/include/raft/random/rng.cuh
@@ -29,6 +29,11 @@
 
 namespace raft::random {
 
+/**
+ * \defgroup univariate_random_sampling Univariate random sampling
+ * @{
+ */
+
 /**
  * @brief Generate uniformly distributed numbers in the given range
  *
@@ -52,6 +57,10 @@ void uniform(raft::resources const& handle,
     rng_state, out.data_handle(), out.extent(0), start, end, resource::get_cuda_stream(handle));
 }
 
+/**
+ * @}
+ */
+
 /**
  * @brief Legacy overload of `uniform` taking raw pointers
  *
@@ -76,6 +85,7 @@ void uniform(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate uniformly distributed integers in the given range
  *
  * @tparam OutputValueType Integral type; value type of the output vector
@@ -128,6 +138,7 @@ void uniformInt(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate normal distributed numbers
  *   with a given mean and standard deviation
  *
@@ -175,6 +186,7 @@ void normal(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate normal distributed integers
  *
  * @tparam OutputValueType Integral type; value type of the output vector
@@ -228,6 +240,7 @@ void normalInt(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate normal distributed table according to the given set of
  * means and scalar standard deviations.
  *
@@ -326,6 +339,7 @@ void normalTable(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Fill a vector with the given value
  *
  * @tparam OutputValueType Value type of the output vector
@@ -364,6 +378,7 @@ void fill(
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate bernoulli distributed boolean array
  *
  * @tparam OutputValueType Type of each element of the output vector;
@@ -407,6 +422,7 @@ void bernoulli(
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate bernoulli distributed array and applies scale
  *
  * @tparam OutputValueType Data type in which to compute the probabilities
@@ -453,6 +469,7 @@ void scaled_bernoulli(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate Gumbel distributed random numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -501,6 +518,7 @@ void gumbel(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate lognormal distributed numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -547,6 +565,7 @@ void lognormal(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate logistic distributed random numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -593,6 +612,7 @@ void logistic(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate exponentially distributed random numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -632,6 +652,7 @@ void exponential(
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate rayleigh distributed random numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -670,6 +691,7 @@ void rayleigh(
   detail::rayleigh(rng_state, ptr, len, sigma, resource::get_cuda_stream(handle));
 }
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate laplace distributed random numbers
  *
  * @tparam OutputValueType data type of output random number
@@ -716,6 +738,7 @@ void laplace(raft::resources const& handle,
 }
 
 /**
+ * @ingroup univariate_random_sampling
  * @brief Generate random integers, where the probability of i is weights[i]/sum(weights)
  *
  * Usage example:
diff --git a/docs/source/cpp_api/core_interruptible.rst b/docs/source/cpp_api/core_interruptible.rst
index da767cdd6d..3f12f602f8 100644
--- a/docs/source/cpp_api/core_interruptible.rst
+++ b/docs/source/cpp_api/core_interruptible.rst
@@ -10,6 +10,7 @@ Interruptible
 
 namespace *raft::core*
 
-.. doxygenclass:: raft::interruptible
-    :project: RAFT
-    :members:
+ .. doxygengroup:: interruptible
+     :project: RAFT
+     :members:
+     :content-only:
diff --git a/docs/source/cpp_api/core_resources.rst b/docs/source/cpp_api/core_resources.rst
index b148e38e44..4f1dd4e5a4 100644
--- a/docs/source/cpp_api/core_resources.rst
+++ b/docs/source/cpp_api/core_resources.rst
@@ -6,7 +6,7 @@ Resources
    :class: highlight
 
 All resources which are specific to a computing environment like host or device are contained within, and managed by,
-raft::resources. This design simplifies the APIs and eases user burden by making them opaque by default but allowing customization based on user preference.
+`raft::resources`. This design simplifies the APIs and eases user burden by making the APIs opaque by default but allowing customization based on user preference.
 
 
 Vocabulary
@@ -25,6 +25,8 @@ namespace *raft::resource*
 Device Resources
 ----------------
 
+`raft::device_resources` is a convenience over using `raft::resources` directly. It provides accessor methods to retrieve resources such as the CUDA stream, stream pool, and handles to the various CUDA math libraries like cuBLAS and cuSOLVER. 
+
 ``#include <raft/core/device_resources.hpp>``
 
 namespace *raft::core*
@@ -81,7 +83,7 @@ CUDA Stream Pool
 
 namespace *raft::resource*
 
-.. doxygengroup:: resource_cuda_stream_pool
+.. doxygengroup:: resource_stream_pool
     :project: RAFT
     :members:
     :content-only:
diff --git a/docs/source/cpp_api/distance_1nn.rst b/docs/source/cpp_api/distance_1nn.rst
index bf99603522..8c1c00d6c9 100644
--- a/docs/source/cpp_api/distance_1nn.rst
+++ b/docs/source/cpp_api/distance_1nn.rst
@@ -17,7 +17,7 @@ namespace *raft::distance*
 ``#include <raft/distance/fused_l2_nn.cuh>``
 namespace *raft::distance*
 
-.. doxygengroup:: masked_l2_nn
+.. doxygengroup:: masked_nn
     :project: RAFT
     :members:
     :content-only:
diff --git a/docs/source/cpp_api/mdspan_mdarray.rst b/docs/source/cpp_api/mdspan_mdarray.rst
index bf9e9e0139..e14fe5a9e3 100644
--- a/docs/source/cpp_api/mdspan_mdarray.rst
+++ b/docs/source/cpp_api/mdspan_mdarray.rst
@@ -7,20 +7,10 @@ mdarray: Multi-dimensional Owning Container
 
 ``#include <raft/core/mdarray.hpp>``
 
-.. doxygenclass:: raft::mdarray
+.. doxygengroup:: mdarray
     :project: RAFT
     :members:
-
-.. doxygenclass:: raft::array_interface
-    :project: RAFT
-    :members:
-
-.. doxygenstruct:: raft::is_array_interface
-    :project: RAFT
-    :members:
-
-.. doxygentypedef:: raft::is_array_interface_t
-    :project RAFT
+    :content-only:
 
 Device Vocabulary
 -----------------
@@ -75,11 +65,7 @@ Host Factories
 
 ``#include <raft/core/host_mdarray.hpp>``
 
-.. doxygenfunction:: raft::make_host_matrix
-    :project: RAFT
-
-.. doxygenfunction:: raft::make_host_vector
-    :project: RAFT
-
-.. doxygenfunction:: raft::make_device_scalar
+.. doxygengroup:: host_mdarray_factories
     :project: RAFT
+    :members:
+    :content-only:
\ No newline at end of file
diff --git a/docs/source/cpp_api/mdspan_representation.rst b/docs/source/cpp_api/mdspan_representation.rst
index fbae03a3e0..f514cf38e0 100644
--- a/docs/source/cpp_api/mdspan_representation.rst
+++ b/docs/source/cpp_api/mdspan_representation.rst
@@ -40,17 +40,15 @@ Shapes
 .. doxygentypedef:: raft::extent_5d
     :project: RAFT
 
-.. doxygenfunction:: raft::flatten(mdspan_type mds)
-    :project: RAFT
-
-.. doxygenfunction:: raft:: flatten(const array_interface_type& mda)
-    :project: RAFT
-
-.. doxygenfunction:: raft::reshape(mdspan_type mds, extents<IndexType, Extents...> new_shape)
+.. doxygengroup:: mdspan_reshape
     :project: RAFT
+    :members:
+    :content-only:
 
-.. doxygenfunction:: raft::reshape(const array_interface_type& mda, extents<IndexType, Extents...> new_shape)
+.. doxygengroup:: mdarray_reshape
     :project: RAFT
+    :members:
+    :content-only:
 
 
 Accessors
diff --git a/docs/source/cpp_api/mdspan_span.rst b/docs/source/cpp_api/mdspan_span.rst
index 2bdaf4941e..870c4329d0 100644
--- a/docs/source/cpp_api/mdspan_span.rst
+++ b/docs/source/cpp_api/mdspan_span.rst
@@ -7,17 +7,22 @@ span: One-dimensional Non-owning View
 
 ``#include <raft/core/span.hpp>``
 
-.. doxygenclass:: raft::span
+.. doxygengroup:: span
     :project: RAFT
     :members:
+    :content-only:
 
 ``#include <raft/core/device_span.hpp>``
 
-.. doxygentypedef:: raft::device_span
-   :project: RAFT
+.. doxygengroup:: device_span
+    :project: RAFT
+    :members:
+    :content-only:
 
 ``#include <raft/core/host_span.hpp>``
 
-.. doxygentypedef:: raft::host_span
-   :project: RAFT
+.. doxygengroup:: host_span
+    :project: RAFT
+    :members:
+    :content-only:
 
diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst
index 9d2e762689..876f68b1bf 100644
--- a/docs/source/cpp_api/neighbors.rst
+++ b/docs/source/cpp_api/neighbors.rst
@@ -15,4 +15,5 @@ This page provides C++ class references for the publicly-exposed elements of the
    neighbors_ivf_flat.rst
    neighbors_ivf_pq.rst
    neighbors_epsilon_neighborhood.rst
-   neighbors_ball_cover.rst
\ No newline at end of file
+   neighbors_ball_cover.rst
+   neighbors_cagra.rst
\ No newline at end of file
diff --git a/docs/source/cpp_api/neighbors_cagra.rst b/docs/source/cpp_api/neighbors_cagra.rst
new file mode 100644
index 0000000000..68372bbb71
--- /dev/null
+++ b/docs/source/cpp_api/neighbors_cagra.rst
@@ -0,0 +1,21 @@
+CAGRA
+=====
+
+CAGRA is a graph-based nearest neighbors implementation with state-of-the art query performance for both small- and large-batch sized search.
+
+Please note that the CAGRA implementation is currently experimental and the API is subject to change from release to release. We are currently working on promoting CAGRA to a top-level stable API within RAFT.
+
+.. role:: py(code)
+   :language: c++
+   :class: highlight
+
+``#include <raft/neighbors/cagra.cuh>``
+
+namespace *raft::neighbors::experimental::cagra*
+
+.. doxygengroup:: cagra
+    :project: RAFT
+    :members:
+    :content-only:
+
+
diff --git a/docs/source/cpp_api/random_datagen.rst b/docs/source/cpp_api/random_datagen.rst
index ec23845b6b..a07f5e0154 100644
--- a/docs/source/cpp_api/random_datagen.rst
+++ b/docs/source/cpp_api/random_datagen.rst
@@ -11,7 +11,7 @@ make_blobs
 ``#include <raft/random/make_blobs.cuh>``
 
 namespace *raft::random*
-2
+
 .. doxygengroup:: make_blobs
     :project: RAFT
     :members:
diff --git a/docs/source/cpp_api/random_sampling_univariate.rst b/docs/source/cpp_api/random_sampling_univariate.rst
index ffa58a0d3a..cabad6945a 100644
--- a/docs/source/cpp_api/random_sampling_univariate.rst
+++ b/docs/source/cpp_api/random_sampling_univariate.rst
@@ -9,49 +9,7 @@ Univariate Random Sampling
 
 namespace *raft::random*
 
-.. doxygenfunction:: raft::random::uniform(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType start, OutputValueType end)
+.. doxygengroup:: univariate_random_sampling
     :project: RAFT
-
-.. doxygenfunction:: raft::random::uniformInt(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType start, OutputValueType end)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::normal(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType sigma)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::normalInt(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType sigma)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::normalTable(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<const OutputValueType, IndexType> mu_vec, std::variant<raft::device_vector_view<const OutputValueType, IndexType>, OutputValueType> sigma, raft::device_matrix_view<OutputValueType, IndexType, raft::row_major> out)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::fill(const raft::handle_t& handle, RngState& rng_state, OutputValueType val, raft::device_vector_view<OutputValueType, IndexType> out)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::bernoulli(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, Type prob)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::scaled_bernoulli(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType prob, OutputValueType scale)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::gumbel(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType beta)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::lognormal(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType sigma)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::logistic(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType scale)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::exponential(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType lambda)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::rayleigh(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType sigma)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::laplace(const raft::handle_t& handle, RngState& rng_state, raft::device_vector_view<OutputValueType, IndexType> out, OutputValueType mu, OutputValueType scale)
-    :project: RAFT
-
-.. doxygenfunction:: raft::random::discrete
-    :project: RAFT
-
-
+    :members:
+    :content-only:

From a9dd4405c311d6809702f1d8555d30302a22e2a8 Mon Sep 17 00:00:00 2001
From: Micka <mide@nvidia.com>
Date: Mon, 22 May 2023 21:11:56 +0200
Subject: [PATCH 74/78] [FEA] Add randomized svd from cusolver (#1000)

Authors:
  - Micka (https://github.com/lowener)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1000
---
 .../raft/linalg/detail/cusolver_wrappers.hpp  | 115 +++++++-
 cpp/include/raft/linalg/detail/rsvd.cuh       |  90 +++++++
 cpp/include/raft/linalg/rsvd.cuh              | 146 +++++++++--
 cpp/include/raft/linalg/transpose.cuh         |   2 +-
 cpp/test/CMakeLists.txt                       |   1 +
 cpp/test/linalg/randomized_svd.cu             | 245 ++++++++++++++++++
 6 files changed, 579 insertions(+), 20 deletions(-)
 create mode 100644 cpp/test/linalg/randomized_svd.cu

diff --git a/cpp/include/raft/linalg/detail/cusolver_wrappers.hpp b/cpp/include/raft/linalg/detail/cusolver_wrappers.hpp
index 3eff920dd8..79fd869083 100644
--- a/cpp/include/raft/linalg/detail/cusolver_wrappers.hpp
+++ b/cpp/include/raft/linalg/detail/cusolver_wrappers.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -693,6 +693,119 @@ inline cusolverStatus_t CUSOLVERAPI cusolverDngesvdj(  // NOLINT
   return cusolverDnDgesvdj(
     handle, jobz, econ, m, n, A, lda, S, U, ldu, V, ldv, work, lwork, info, params);
 }
+
+#if CUDART_VERSION >= 11010
+template <typename T>
+cusolverStatus_t cusolverDnxgesvdr_bufferSize(  // NOLINT
+  cusolverDnHandle_t handle,
+  signed char jobu,
+  signed char jobv,
+  int64_t m,
+  int64_t n,
+  int64_t k,
+  int64_t p,
+  int64_t niters,
+  const T* a,
+  int64_t lda,
+  const T* Srand,
+  const T* Urand,
+  int64_t ldUrand,
+  const T* Vrand,
+  int64_t ldVrand,
+  size_t* workspaceInBytesOnDevice,
+  size_t* workspaceInBytesOnHost,
+  cudaStream_t stream)
+{
+  RAFT_EXPECTS(std::is_floating_point_v<T>, "Unsupported data type");
+  cudaDataType dataType = std::is_same_v<T, float> ? CUDA_R_32F : CUDA_R_64F;
+  RAFT_CUSOLVER_TRY(cusolverDnSetStream(handle, stream));
+  cusolverDnParams_t dn_params = nullptr;
+  RAFT_CUSOLVER_TRY(cusolverDnCreateParams(&dn_params));
+  auto result = cusolverDnXgesvdr_bufferSize(handle,
+                                             dn_params,
+                                             jobu,
+                                             jobv,
+                                             m,
+                                             n,
+                                             k,
+                                             p,
+                                             niters,
+                                             dataType,
+                                             a,
+                                             lda,
+                                             dataType,
+                                             Srand,
+                                             dataType,
+                                             Urand,
+                                             ldUrand,
+                                             dataType,
+                                             Vrand,
+                                             ldVrand,
+                                             dataType,
+                                             workspaceInBytesOnDevice,
+                                             workspaceInBytesOnHost);
+  RAFT_CUSOLVER_TRY(cusolverDnDestroyParams(dn_params));
+  return result;
+}
+template <typename T>
+cusolverStatus_t cusolverDnxgesvdr(  // NOLINT
+  cusolverDnHandle_t handle,
+  signed char jobu,
+  signed char jobv,
+  int64_t m,
+  int64_t n,
+  int64_t k,
+  int64_t p,
+  int64_t niters,
+  T* a,
+  int64_t lda,
+  T* Srand,
+  T* Urand,
+  int64_t ldUrand,
+  T* Vrand,
+  int64_t ldVrand,
+  void* bufferOnDevice,
+  size_t workspaceInBytesOnDevice,
+  void* bufferOnHost,
+  size_t workspaceInBytesOnHost,
+  int* d_info,
+  cudaStream_t stream)
+{
+  cudaDataType dataType = std::is_same_v<T, float> ? CUDA_R_32F : CUDA_R_64F;
+  RAFT_CUSOLVER_TRY(cusolverDnSetStream(handle, stream));
+  cusolverDnParams_t dn_params = nullptr;
+  RAFT_CUSOLVER_TRY(cusolverDnCreateParams(&dn_params));
+  auto result = cusolverDnXgesvdr(handle,
+                                  dn_params,
+                                  jobu,
+                                  jobv,
+                                  m,
+                                  n,
+                                  k,
+                                  p,
+                                  niters,
+                                  dataType,
+                                  a,
+                                  lda,
+                                  dataType,
+                                  Srand,
+                                  dataType,
+                                  Urand,
+                                  ldUrand,
+                                  dataType,
+                                  Vrand,
+                                  ldVrand,
+                                  dataType,
+                                  bufferOnDevice,
+                                  workspaceInBytesOnDevice,
+                                  bufferOnHost,
+                                  workspaceInBytesOnHost,
+                                  d_info);
+  RAFT_CUSOLVER_TRY(cusolverDnDestroyParams(dn_params));
+  return result;
+}
+#endif  // CUDART_VERSION >= 11010
+
 /** @} */
 
 /**
diff --git a/cpp/include/raft/linalg/detail/rsvd.cuh b/cpp/include/raft/linalg/detail/rsvd.cuh
index 50cb339ea1..9c2cea6b66 100644
--- a/cpp/include/raft/linalg/detail/rsvd.cuh
+++ b/cpp/include/raft/linalg/detail/rsvd.cuh
@@ -37,6 +37,96 @@ namespace raft {
 namespace linalg {
 namespace detail {
 
+template <typename math_t>
+void randomized_svd(const raft::device_resources& handle,
+                    const math_t* in,
+                    std::size_t n_rows,
+                    std::size_t n_cols,
+                    std::size_t k,
+                    std::size_t p,
+                    std::size_t niters,
+                    math_t* S,
+                    math_t* U,
+                    math_t* V,
+                    bool gen_U,
+                    bool gen_V)
+{
+  common::nvtx::range<common::nvtx::domain::raft> fun_scope(
+    "raft::linalg::randomized_svd(%d, %d, %d)", n_rows, n_cols, k);
+
+  RAFT_EXPECTS(k < std::min(n_rows, n_cols), "k must be < min(n_rows, n_cols)");
+  RAFT_EXPECTS((k + p) < std::min(n_rows, n_cols), "k + p must be < min(n_rows, n_cols)");
+  RAFT_EXPECTS(!gen_U || (U != nullptr), "computation of U vector requested but found nullptr");
+  RAFT_EXPECTS(!gen_V || (V != nullptr), "computation of V vector requested but found nullptr");
+#if CUDART_VERSION < 11050
+  RAFT_EXPECTS(gen_U && gen_V, "not computing U or V is not supported in CUDA version < 11.5");
+#endif
+  cudaStream_t stream          = handle.get_stream();
+  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+
+  char jobu = gen_U ? 'S' : 'N';
+  char jobv = gen_V ? 'S' : 'N';
+
+  auto lda     = n_rows;
+  auto ldu     = n_rows;
+  auto ldv     = n_cols;
+  auto* in_ptr = const_cast<math_t*>(in);
+
+  size_t workspaceDevice = 0;
+  size_t workspaceHost   = 0;
+  RAFT_CUSOLVER_TRY(cusolverDnxgesvdr_bufferSize(cusolverH,
+                                                 jobu,
+                                                 jobv,
+                                                 n_rows,
+                                                 n_cols,
+                                                 k,
+                                                 p,
+                                                 niters,
+                                                 in_ptr,
+                                                 lda,
+                                                 S,
+                                                 U,
+                                                 ldu,
+                                                 V,
+                                                 ldv,
+                                                 &workspaceDevice,
+                                                 &workspaceHost,
+                                                 stream));
+
+  auto d_workspace = raft::make_device_vector<char>(handle, workspaceDevice);
+  auto h_workspace = raft::make_host_vector<char>(workspaceHost);
+  auto devInfo     = raft::make_device_scalar<int>(handle, 0);
+
+  RAFT_CUSOLVER_TRY(cusolverDnxgesvdr(cusolverH,
+                                      jobu,
+                                      jobv,
+                                      n_rows,
+                                      n_cols,
+                                      k,
+                                      p,
+                                      niters,
+                                      in_ptr,
+                                      lda,
+                                      S,
+                                      U,
+                                      ldu,
+                                      V,
+                                      ldv,
+                                      d_workspace.data_handle(),
+                                      workspaceDevice,
+                                      h_workspace.data_handle(),
+                                      workspaceHost,
+                                      devInfo.data_handle(),
+                                      stream));
+
+  RAFT_CUDA_TRY(cudaGetLastError());
+
+  int dev_info;
+  raft::update_host(&dev_info, devInfo.data_handle(), 1, stream);
+  handle.sync_stream(stream);
+  ASSERT(dev_info == 0, "rsvd.cuh: Invalid parameter encountered.");
+}
+
 /**
  * @brief randomized singular value decomposition (RSVD) on the column major
  * float type input matrix (Jacobi-based), by specifying no. of PCs and
diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh
index 4a6c058061..8037611a54 100644
--- a/cpp/include/raft/linalg/rsvd.cuh
+++ b/cpp/include/raft/linalg/rsvd.cuh
@@ -19,9 +19,8 @@
 #pragma once
 
 #include "detail/rsvd.cuh"
-#include <raft/core/resource/cuda_stream.hpp>
-
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 
 namespace raft {
 namespace linalg {
@@ -176,16 +175,20 @@ void rsvd_fixed_rank(raft::resources const& handle,
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
+    U_ptr = U.value().data_handle();
   }
   if (V) {
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
+    V_ptr = V.value().data_handle();
   }
 
   rsvdFixedRank(handle,
@@ -193,8 +196,8 @@ void rsvd_fixed_rank(raft::resources const& handle,
                 M.extent(0),
                 M.extent(1),
                 S_vec.data_handle(),
-                U.value().data_handle(),
-                V.value().data_handle(),
+                U_ptr,
+                V_ptr,
                 S_vec.extent(0),
                 p,
                 false,
@@ -251,13 +254,17 @@ void rsvd_fixed_rank_symmetric(
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -268,8 +275,8 @@ void rsvd_fixed_rank_symmetric(
                 M.extent(0),
                 M.extent(1),
                 S_vec.data_handle(),
-                U.value().data_handle(),
-                V.value().data_handle(),
+                U_ptr,
+                V_ptr,
                 S_vec.extent(0),
                 p,
                 true,
@@ -329,13 +336,17 @@ void rsvd_fixed_rank_jacobi(raft::resources const& handle,
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -346,8 +357,8 @@ void rsvd_fixed_rank_jacobi(raft::resources const& handle,
                 M.extent(0),
                 M.extent(1),
                 S_vec.data_handle(),
-                U.value().data_handle(),
-                V.value().data_handle(),
+                U_ptr,
+                V_ptr,
                 S_vec.extent(0),
                 p,
                 false,
@@ -408,13 +419,17 @@ void rsvd_fixed_rank_symmetric_jacobi(
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -425,8 +440,8 @@ void rsvd_fixed_rank_symmetric_jacobi(
                 M.extent(0),
                 M.extent(1),
                 S_vec.data_handle(),
-                U.value().data_handle(),
-                V.value().data_handle(),
+                U_ptr,
+                V_ptr,
                 S_vec.extent(0),
                 p,
                 true,
@@ -484,13 +499,17 @@ void rsvd_perc(raft::resources const& handle,
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -501,8 +520,8 @@ void rsvd_perc(raft::resources const& handle,
            M.extent(0),
            M.extent(1),
            S_vec.data_handle(),
-           U.value().data_handle(),
-           V.value().data_handle(),
+           U_ptr,
+           V_ptr,
            PC_perc,
            UpS_perc,
            false,
@@ -560,13 +579,17 @@ void rsvd_perc_symmetric(raft::resources const& handle,
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -577,8 +600,8 @@ void rsvd_perc_symmetric(raft::resources const& handle,
            M.extent(0),
            M.extent(1),
            S_vec.data_handle(),
-           U.value().data_handle(),
-           V.value().data_handle(),
+           U_ptr,
+           V_ptr,
            PC_perc,
            UpS_perc,
            true,
@@ -640,13 +663,17 @@ void rsvd_perc_jacobi(raft::resources const& handle,
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -657,8 +684,8 @@ void rsvd_perc_jacobi(raft::resources const& handle,
            M.extent(0),
            M.extent(1),
            S_vec.data_handle(),
-           U.value().data_handle(),
-           V.value().data_handle(),
+           U_ptr,
+           V_ptr,
            PC_perc,
            UpS_perc,
            false,
@@ -721,13 +748,17 @@ void rsvd_perc_symmetric_jacobi(
     std::forward<UType>(U_in);
   std::optional<raft::device_matrix_view<ValueType, IndexType, raft::col_major>> V =
     std::forward<VType>(V_in);
+  ValueType* U_ptr = nullptr;
+  ValueType* V_ptr = nullptr;
 
   if (U) {
+    U_ptr = U.value().data_handle();
     RAFT_EXPECTS(M.extent(0) == U.value().extent(0), "Number of rows in M should be equal to U");
     RAFT_EXPECTS(S_vec.extent(0) == U.value().extent(1),
                  "Number of columns in U should be equal to length of S");
   }
   if (V) {
+    V_ptr = V.value().data_handle();
     RAFT_EXPECTS(M.extent(1) == V.value().extent(1), "Number of columns in M should be equal to V");
     RAFT_EXPECTS(S_vec.extent(0) == V.value().extent(0),
                  "Number of rows in V should be equal to length of S");
@@ -738,8 +769,8 @@ void rsvd_perc_symmetric_jacobi(
            M.extent(0),
            M.extent(1),
            S_vec.data_handle(),
-           U.value().data_handle(),
-           V.value().data_handle(),
+           U_ptr,
+           V_ptr,
            PC_perc,
            UpS_perc,
            true,
@@ -764,6 +795,85 @@ void rsvd_perc_symmetric_jacobi(Args... args)
   rsvd_perc_symmetric_jacobi(std::forward<Args>(args)..., std::nullopt, std::nullopt);
 }
 
+/**
+ * @brief randomized singular value decomposition (RSVD) using cusolver
+ * @tparam math_t the data type
+ * @tparam idx_t index type
+ * @param[in]  handle:  raft handle
+ * @param[in]  in:      input matrix in col-major format.
+ *                      Warning: the content of this matrix is modified by the cuSOLVER routines.
+ *                      [dim = n_rows * n_cols]
+ * @param[out] S:       array of singular values of input matrix. The rank k must be less than
+ * min(m,n). [dim = k]
+ * @param[out] U:       optional left singular values of input matrix. Use std::nullopt to not
+ * generate it. [dim = n_rows * k]
+ * @param[out] V:       optional right singular values of input matrix. Use std::nullopt to not
+ * generate it. [dim = k * n_cols]
+ * @param[in]  p:       Oversampling. The size of the subspace will be (k + p). (k+p) is less than
+ * min(m,n). (Recommended to be at least 2*k)
+ * @param[in]  niters:  Number of iteration of power method. (2 is recommended)
+ */
+template <typename math_t, typename idx_t>
+void randomized_svd(const raft::device_resources& handle,
+                    raft::device_matrix_view<const math_t, idx_t, raft::col_major> in,
+                    raft::device_vector_view<math_t, idx_t> S,
+                    std::optional<raft::device_matrix_view<math_t, idx_t, raft::col_major>> U,
+                    std::optional<raft::device_matrix_view<math_t, idx_t, raft::col_major>> V,
+                    std::size_t p,
+                    std::size_t niters)
+{
+  auto k                      = S.extent(0);
+  math_t* left_sing_vecs_ptr  = nullptr;
+  math_t* right_sing_vecs_ptr = nullptr;
+  auto gen_U                  = U.has_value();
+  auto gen_V                  = V.has_value();
+  if (gen_U) {
+    RAFT_EXPECTS(in.extent(0) == U.value().extent(0) && k == U.value().extent(1),
+                 "U should have dimensions n_rows * k");
+    left_sing_vecs_ptr = U.value().data_handle();
+  }
+  if (gen_V) {
+    RAFT_EXPECTS(k == V.value().extent(0) && in.extent(1) == V.value().extent(1),
+                 "V should have dimensions k * n_cols");
+    right_sing_vecs_ptr = V.value().data_handle();
+  }
+  detail::randomized_svd(handle,
+                         in.data_handle(),
+                         in.extent(0),
+                         in.extent(1),
+                         k,
+                         p,
+                         niters,
+                         S.data_handle(),
+                         left_sing_vecs_ptr,
+                         right_sing_vecs_ptr,
+                         gen_U,
+                         gen_V);
+}
+
+/**
+ * @brief Overload of `randomized_svd` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `randomized_svd`.
+ */
+template <typename math_t, typename idx_t, typename opt_u_vec_t, typename opt_v_vec_t>
+void randomized_svd(const raft::device_resources& handle,
+                    raft::device_matrix_view<const math_t, idx_t, raft::col_major> in,
+                    raft::device_vector_view<math_t, idx_t> S,
+                    opt_u_vec_t&& U,
+                    opt_v_vec_t&& V,
+                    std::size_t p,
+                    std::size_t niters)
+{
+  std::optional<raft::device_matrix_view<math_t, idx_t, raft::col_major>> opt_u =
+    std::forward<opt_u_vec_t>(U);
+  std::optional<raft::device_matrix_view<math_t, idx_t, raft::col_major>> opt_v =
+    std::forward<opt_v_vec_t>(V);
+  randomized_svd(handle, in, S, opt_u, opt_v, p, niters);
+}
+
 /** @} */  // end of group rsvd
 
 };         // end namespace linalg
diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh
index 0fe752347d..afe1962223 100644
--- a/cpp/include/raft/linalg/transpose.cuh
+++ b/cpp/include/raft/linalg/transpose.cuh
@@ -74,7 +74,7 @@ void transpose(math_t* inout, int n, cudaStream_t stream)
  *
  * @param[in]  handle raft handle for managing expensive cuda resources.
  * @param[in]  in     Input matrix.
- * @param[out] out    Output matirx, storage is pre-allocated by caller.
+ * @param[out] out    Output matrix, storage is pre-allocated by caller.
  */
 template <typename T, typename IndexType, typename LayoutPolicy, typename AccessorPolicy>
 auto transpose(raft::resources const& handle,
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 1b4d269d1b..871869102c 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -215,6 +215,7 @@ if(BUILD_TESTS)
     test/linalg/norm.cu
     test/linalg/normalize.cu
     test/linalg/power.cu
+    test/linalg/randomized_svd.cu
     test/linalg/reduce.cu
     test/linalg/reduce_cols_by_key.cu
     test/linalg/reduce_rows_by_key.cu
diff --git a/cpp/test/linalg/randomized_svd.cu b/cpp/test/linalg/randomized_svd.cu
new file mode 100644
index 0000000000..2d55fd7579
--- /dev/null
+++ b/cpp/test/linalg/randomized_svd.cu
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_utils.cuh"
+#include <gtest/gtest.h>
+#include <raft/linalg/rsvd.cuh>
+#include <raft/linalg/svd.cuh>
+#include <raft/matrix/diagonal.cuh>
+#include <raft/matrix/matrix.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+
+namespace raft {
+namespace linalg {
+
+template <typename T>
+struct randomized_svdInputs {
+  T tolerance;
+  int n_row;
+  int n_col;
+  int k;
+  unsigned long long int seed;
+};
+
+template <typename T>
+::std::ostream& operator<<(::std::ostream& os, const randomized_svdInputs<T>& dims)
+{
+  return os;
+}
+
+template <typename T>
+class randomized_svdTest : public ::testing::TestWithParam<randomized_svdInputs<T>> {
+ public:
+  randomized_svdTest()
+    : params(::testing::TestWithParam<randomized_svdInputs<T>>::GetParam()),
+      stream(handle.get_stream()),
+      data(params.n_row * params.n_col, stream),
+      reconst(params.n_row * params.n_col, stream),
+      left_eig_vectors_act(params.n_row * params.k, stream),
+      right_eig_vectors_act(params.k * params.n_col, stream),
+      sing_vals_act(params.k, stream),
+      left_eig_vectors_ref(params.n_row * params.n_col, stream),
+      right_eig_vectors_ref(params.n_col * params.n_col, stream),
+      sing_vals_ref(params.k, stream)
+  {
+  }
+
+ protected:
+  void basicTest()
+  {
+    int len = params.n_row * params.n_col;
+    ASSERT(params.n_row == 5 && params.n_col == 5, "This test only supports nrows=5 && ncols=5!");
+    T data_h[] = {0.76420743, 0.61411544, 0.81724151, 0.42040879, 0.03446089,
+                  0.03697287, 0.85962444, 0.67584086, 0.45594666, 0.02074835,
+                  0.42018265, 0.39204509, 0.12657948, 0.90250559, 0.23076218,
+                  0.50339844, 0.92974961, 0.21213988, 0.63962457, 0.58124562,
+                  0.58325673, 0.11589871, 0.39831112, 0.21492685, 0.00540355};
+    raft::update_device(data.data(), data_h, len, stream);
+
+    T left_eig_vectors_ref_h[] = {0.42823088,
+                                  0.59131151,
+                                  0.4220887,
+                                  0.50441194,
+                                  0.18541506,
+                                  0.27047497,
+                                  -0.17195579,
+                                  0.69362791,
+                                  -0.43253894,
+                                  -0.47860724};
+
+    T right_eig_vectors_ref_h[] = {0.53005494,
+                                   0.44104121,
+                                   0.40720732,
+                                   0.54337293,
+                                   0.25189773,
+                                   0.5789401,
+                                   0.15264214,
+                                   -0.45215699,
+                                   -0.53184873,
+                                   0.3927082};
+
+    T sing_vals_ref_h[] = {2.36539241, 0.81117785, 0.68562255, 0.41390509, 0.01519322};
+
+    raft::update_device(
+      left_eig_vectors_ref.data(), left_eig_vectors_ref_h, params.n_row * params.k, stream);
+    raft::update_device(
+      right_eig_vectors_ref.data(), right_eig_vectors_ref_h, params.k * params.n_col, stream);
+    raft::update_device(sing_vals_ref.data(), sing_vals_ref_h, params.k, stream);
+
+    randomized_svd(handle,
+                   raft::make_device_matrix_view<const T, uint32_t, raft::col_major>(
+                     data.data(), params.n_row, params.n_col),
+                   raft::make_device_vector_view<T, uint32_t>(sing_vals_act.data(), params.k),
+                   std::make_optional(raft::make_device_matrix_view<T, uint32_t, raft::col_major>(
+                     left_eig_vectors_act.data(), params.n_row, params.k)),
+                   std::make_optional(raft::make_device_matrix_view<T, uint32_t, raft::col_major>(
+                     right_eig_vectors_act.data(), params.k, params.n_col)),
+                   2,
+                   2);
+    handle.sync_stream(stream);
+  }
+
+  void apiTest()
+  {
+    int len = params.n_row * params.n_col;
+    ASSERT(params.n_row == 5 && params.n_col == 5, "This test only supports nrows=5 && ncols=5!");
+    T data_h[] = {0.76420743, 0.61411544, 0.81724151, 0.42040879, 0.03446089,
+                  0.03697287, 0.85962444, 0.67584086, 0.45594666, 0.02074835,
+                  0.42018265, 0.39204509, 0.12657948, 0.90250559, 0.23076218,
+                  0.50339844, 0.92974961, 0.21213988, 0.63962457, 0.58124562,
+                  0.58325673, 0.11589871, 0.39831112, 0.21492685, 0.00540355};
+    raft::update_device(data.data(), data_h, len, stream);
+
+    T left_eig_vectors_ref_h[] = {0.42823088,
+                                  0.59131151,
+                                  0.4220887,
+                                  0.50441194,
+                                  0.18541506,
+                                  0.27047497,
+                                  -0.17195579,
+                                  0.69362791,
+                                  -0.43253894,
+                                  -0.47860724};
+
+    T right_eig_vectors_ref_h[] = {0.53005494,
+                                   0.44104121,
+                                   0.40720732,
+                                   0.54337293,
+                                   0.25189773,
+                                   0.5789401,
+                                   0.15264214,
+                                   -0.45215699,
+                                   -0.53184873,
+                                   0.3927082};
+
+    T sing_vals_ref_h[] = {2.36539241, 0.81117785, 0.68562255, 0.41390509, 0.01519322};
+
+    raft::update_device(
+      left_eig_vectors_ref.data(), left_eig_vectors_ref_h, params.n_row * params.k, stream);
+    raft::update_device(
+      right_eig_vectors_ref.data(), right_eig_vectors_ref_h, params.k * params.n_col, stream);
+    raft::update_device(sing_vals_ref.data(), sing_vals_ref_h, params.k, stream);
+    randomized_svd(handle,
+                   raft::make_device_matrix_view<const T, uint32_t, raft::col_major>(
+                     data.data(), params.n_row, params.n_col),
+                   raft::make_device_vector_view<T, uint32_t>(sing_vals_act.data(), params.k),
+                   std::nullopt,
+                   std::make_optional(raft::make_device_matrix_view<T, uint32_t, raft::col_major>(
+                     right_eig_vectors_act.data(), params.k, params.n_col)),
+                   2,
+                   2);
+    randomized_svd(handle,
+                   raft::make_device_matrix_view<const T, uint32_t, raft::col_major>(
+                     data.data(), params.n_row, params.n_col),
+                   raft::make_device_vector_view<T, uint32_t>(sing_vals_act.data(), params.k),
+                   std::make_optional(raft::make_device_matrix_view<T, uint32_t, raft::col_major>(
+                     left_eig_vectors_act.data(), params.n_row, params.k)),
+                   std::nullopt,
+                   2,
+                   2);
+    randomized_svd(handle,
+                   raft::make_device_matrix_view<const T, uint32_t, raft::col_major>(
+                     data.data(), params.n_row, params.n_col),
+                   raft::make_device_vector_view<T, uint32_t>(sing_vals_act.data(), params.k),
+                   std::nullopt,
+                   std::nullopt,
+                   2,
+                   2);
+    handle.sync_stream(stream);
+  }
+
+  void SetUp() override
+  {
+    int major = 0;
+    int minor = 0;
+    cusolverGetProperty(MAJOR_VERSION, &major);
+    cusolverGetProperty(MINOR_VERSION, &minor);
+    int cusolv_version = major * 1000 + minor * 10;
+    if (cusolv_version >= 11050) apiTest();
+    basicTest();
+  }
+
+ protected:
+  raft::device_resources handle;
+  cudaStream_t stream;
+
+  randomized_svdInputs<T> params;
+  rmm::device_uvector<T> data, left_eig_vectors_act, right_eig_vectors_act, sing_vals_act,
+    left_eig_vectors_ref, right_eig_vectors_ref, sing_vals_ref, reconst;
+};
+
+const std::vector<randomized_svdInputs<float>> inputsf1  = {{0.0001f, 5, 5, 2, 1234ULL}};
+const std::vector<randomized_svdInputs<double>> inputsd1 = {{0.0001, 5, 5, 2, 1234ULL}};
+
+typedef randomized_svdTest<float> randomized_svdTestF;
+TEST_P(randomized_svdTestF, Result)
+{
+  ASSERT_TRUE(raft::devArrMatch(sing_vals_ref.data(),
+                                sing_vals_act.data(),
+                                params.k,
+                                raft::CompareApproxAbs<float>(params.tolerance)));
+  ASSERT_TRUE(raft::devArrMatch(left_eig_vectors_ref.data(),
+                                left_eig_vectors_act.data(),
+                                params.n_row * params.k,
+                                raft::CompareApproxAbs<float>(params.tolerance)));
+  ASSERT_TRUE(raft::devArrMatch(right_eig_vectors_ref.data(),
+                                right_eig_vectors_act.data(),
+                                params.k * params.n_col,
+                                raft::CompareApproxAbs<float>(params.tolerance)));
+}
+
+typedef randomized_svdTest<double> randomized_svdTestD;
+TEST_P(randomized_svdTestD, Result)
+{
+  ASSERT_TRUE(raft::devArrMatch(sing_vals_ref.data(),
+                                sing_vals_act.data(),
+                                params.k,
+                                raft::CompareApproxAbs<double>(params.tolerance)));
+  ASSERT_TRUE(raft::devArrMatch(left_eig_vectors_ref.data(),
+                                left_eig_vectors_act.data(),
+                                params.n_row * params.k,
+                                raft::CompareApproxAbs<double>(params.tolerance)));
+  ASSERT_TRUE(raft::devArrMatch(right_eig_vectors_ref.data(),
+                                right_eig_vectors_act.data(),
+                                params.k * params.n_col,
+                                raft::CompareApproxAbs<double>(params.tolerance)));
+}
+
+INSTANTIATE_TEST_SUITE_P(randomized_svdTests1, randomized_svdTestF, ::testing::ValuesIn(inputsf1));
+INSTANTIATE_TEST_SUITE_P(randomized_svdTests1, randomized_svdTestD, ::testing::ValuesIn(inputsd1));
+}  // end namespace linalg
+}  // end namespace raft

From 42c9c180f2dcdb1437c45bfb0c753db49ec0a732 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 22 May 2023 22:14:59 -0400
Subject: [PATCH 75/78] Using raft::resources in rsvd (#1543)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/1543
---
 cpp/include/raft/linalg/detail/rsvd.cuh | 9 +++++----
 cpp/include/raft/linalg/rsvd.cuh        | 4 ++--
 cpp/test/linalg/randomized_svd.cu       | 1 +
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/cpp/include/raft/linalg/detail/rsvd.cuh b/cpp/include/raft/linalg/detail/rsvd.cuh
index 9c2cea6b66..422b19c0e5 100644
--- a/cpp/include/raft/linalg/detail/rsvd.cuh
+++ b/cpp/include/raft/linalg/detail/rsvd.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <raft/core/resource/cublas_handle.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/core/resource/cusolver_dn_handle.hpp>
 #include <raft/linalg/eig.cuh>
 #include <raft/linalg/gemm.cuh>
@@ -38,7 +39,7 @@ namespace linalg {
 namespace detail {
 
 template <typename math_t>
-void randomized_svd(const raft::device_resources& handle,
+void randomized_svd(const raft::resources& handle,
                     const math_t* in,
                     std::size_t n_rows,
                     std::size_t n_cols,
@@ -61,8 +62,8 @@ void randomized_svd(const raft::device_resources& handle,
 #if CUDART_VERSION < 11050
   RAFT_EXPECTS(gen_U && gen_V, "not computing U or V is not supported in CUDA version < 11.5");
 #endif
-  cudaStream_t stream          = handle.get_stream();
-  cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
+  cudaStream_t stream          = resource::get_cuda_stream(handle);
+  cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle);
 
   char jobu = gen_U ? 'S' : 'N';
   char jobv = gen_V ? 'S' : 'N';
@@ -123,7 +124,7 @@ void randomized_svd(const raft::device_resources& handle,
 
   int dev_info;
   raft::update_host(&dev_info, devInfo.data_handle(), 1, stream);
-  handle.sync_stream(stream);
+  resource::sync_stream(handle);
   ASSERT(dev_info == 0, "rsvd.cuh: Invalid parameter encountered.");
 }
 
diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh
index 8037611a54..2dece5b957 100644
--- a/cpp/include/raft/linalg/rsvd.cuh
+++ b/cpp/include/raft/linalg/rsvd.cuh
@@ -814,7 +814,7 @@ void rsvd_perc_symmetric_jacobi(Args... args)
  * @param[in]  niters:  Number of iteration of power method. (2 is recommended)
  */
 template <typename math_t, typename idx_t>
-void randomized_svd(const raft::device_resources& handle,
+void randomized_svd(const raft::resources& handle,
                     raft::device_matrix_view<const math_t, idx_t, raft::col_major> in,
                     raft::device_vector_view<math_t, idx_t> S,
                     std::optional<raft::device_matrix_view<math_t, idx_t, raft::col_major>> U,
@@ -859,7 +859,7 @@ void randomized_svd(const raft::device_resources& handle,
  * Please see above for documentation of `randomized_svd`.
  */
 template <typename math_t, typename idx_t, typename opt_u_vec_t, typename opt_v_vec_t>
-void randomized_svd(const raft::device_resources& handle,
+void randomized_svd(const raft::resources& handle,
                     raft::device_matrix_view<const math_t, idx_t, raft::col_major> in,
                     raft::device_vector_view<math_t, idx_t> S,
                     opt_u_vec_t&& U,
diff --git a/cpp/test/linalg/randomized_svd.cu b/cpp/test/linalg/randomized_svd.cu
index 2d55fd7579..9e1d3df6dc 100644
--- a/cpp/test/linalg/randomized_svd.cu
+++ b/cpp/test/linalg/randomized_svd.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_resources.hpp>
 #include <raft/linalg/rsvd.cuh>
 #include <raft/linalg/svd.cuh>
 #include <raft/matrix/diagonal.cuh>

From 7d3bed24dce125e430627fb7c7df2d2bc8642a39 Mon Sep 17 00:00:00 2001
From: jakirkham <jakirkham@gmail.com>
Date: Wed, 31 May 2023 06:43:01 -0700
Subject: [PATCH 76/78] Require Numba 0.57.0+ (#1559)

Align with the rest of RAPIDS on these requirements. Also needed for CUDA 12 support.

Authors:
   - https://github.com/jakirkham

Approvers:
   - Corey J. Nolet (https://github.com/cjnolet)
   - Ray Douglass (https://github.com/raydouglass)
   - Bradley Dice (https://github.com/bdice)
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 2 +-
 python/raft-dask/pyproject.toml                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index aae2aa3d15..c32596f5b6 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -39,7 +39,7 @@ dependencies:
 - libcusparse=11.7.5.86
 - nccl>=2.9.9
 - ninja
-- numba>=0.49
+- numba>=0.57
 - numpy>=1.21
 - numpydoc
 - pydata-sphinx-theme
diff --git a/dependencies.yaml b/dependencies.yaml
index c768fe0333..f58d931e42 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -274,7 +274,7 @@ dependencies:
           - dask-cuda==23.6.*
           - distributed==2023.3.2.1
           - joblib>=0.11
-          - numba>=0.49
+          - numba>=0.57
           - *numpy
           - ucx-py==0.32.*
       - output_types: conda
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index ac6a35b5ab..45563e4281 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -38,7 +38,7 @@ dependencies = [
     "dask==2023.3.2",
     "distributed==2023.3.2.1",
     "joblib>=0.11",
-    "numba>=0.49",
+    "numba>=0.57",
     "numpy>=1.21",
     "pylibraft==23.6.*",
     "ucx-py==0.32.*",

From fc979fe6e4ec8030c07eda96c037e34e7c958b7a Mon Sep 17 00:00:00 2001
From: Mahesh Doijade <36705640+mdoijade@users.noreply.github.com>
Date: Tue, 6 Jun 2023 22:19:42 +0530
Subject: [PATCH 77/78] [HOTFIX] Fix  distance metrics L2/cosine/correlation
 when X & Y are same buffer but with different shape and add unit test for
 such case. (#1571)

-- This is how tiled_brute_force_knn may use pairwise distance API hence assuming when X == Y the buffer has same shape is incorrect.

Authors:
   - Mahesh Doijade (https://github.com/mdoijade)

Approvers:
   - Tamas Bela Feher (https://github.com/tfeher)
   - Corey J. Nolet (https://github.com/cjnolet)
---
 cpp/include/raft/distance/detail/distance.cuh |  79 ++++++++------
 cpp/test/distance/dist_correlation.cu         |  23 ++++
 cpp/test/distance/dist_cos.cu                 |  39 +++++++
 cpp/test/distance/dist_l2_exp.cu              |  40 +++++++
 cpp/test/distance/distance_base.cuh           | 102 ++++++++++++++++++
 5 files changed, 250 insertions(+), 33 deletions(-)

diff --git a/cpp/include/raft/distance/detail/distance.cuh b/cpp/include/raft/distance/detail/distance.cuh
index 7493c4e558..b6885808ce 100644
--- a/cpp/include/raft/distance/detail/distance.cuh
+++ b/cpp/include/raft/distance/detail/distance.cuh
@@ -126,9 +126,7 @@ void distance_impl(raft::resources const& handle,
                    bool is_row_major,
                    DataT)  // unused
 {
-  ASSERT(
-    !(((x != y) && (worksize < 2 * (m + n) * sizeof(AccT))) || (worksize < 2 * m * sizeof(AccT))),
-    "workspace size error");
+  ASSERT(!(worksize < 2 * (m + n) * sizeof(AccT)), "workspace size error");
   ASSERT(workspace != nullptr, "workspace is null");
 
   cudaStream_t stream = raft::resource::get_cuda_stream(handle);
@@ -137,9 +135,27 @@ void distance_impl(raft::resources const& handle,
   AccT* y_norm    = workspace;
   AccT* sq_x_norm = workspace;
   AccT* sq_y_norm = workspace;
-  if (x != y) {
+  // TODO: Column major case looks to have lower accuracy for X == Y,
+  // perhaps the use of stridedSummationKernel could be causing this,
+  // need to investigate and fix.
+  if (x == y && is_row_major) {
+    raft::linalg::reduce(x_norm,
+                         x,
+                         k,
+                         std::max(m, n),
+                         (AccT)0,
+                         is_row_major,
+                         true,
+                         stream,
+                         false,
+                         raft::identity_op(),
+                         raft::add_op());
+    sq_x_norm += std::max(m, n);
+    sq_y_norm = sq_x_norm;
+    raft::linalg::rowNorm(
+      sq_x_norm, x, k, std::max(m, n), raft::linalg::L2Norm, is_row_major, stream);
+  } else {
     y_norm += m;
-
     raft::linalg::reduce(x_norm,
                          x,
                          k,
@@ -167,21 +183,6 @@ void distance_impl(raft::resources const& handle,
     sq_y_norm = sq_x_norm + m;
     raft::linalg::rowNorm(sq_x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream);
     raft::linalg::rowNorm(sq_y_norm, y, k, n, raft::linalg::L2Norm, is_row_major, stream);
-  } else {
-    raft::linalg::reduce(x_norm,
-                         x,
-                         k,
-                         m,
-                         (AccT)0,
-                         is_row_major,
-                         true,
-                         stream,
-                         false,
-                         raft::identity_op(),
-                         raft::add_op());
-    sq_x_norm += m;
-    sq_y_norm = sq_x_norm;
-    raft::linalg::rowNorm(sq_x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream);
   }
 
   using OpT = ops::correlation_distance_op<DataT, AccT, IdxT>;
@@ -210,23 +211,25 @@ void distance_impl(raft::resources const& handle,
                 "OutT can be uint8_t, float, double,"
                 "if sizeof(OutT) > 1 then sizeof(AccT) == sizeof(OutT).");
 
-  ASSERT(!(((x != y) && (worksize < (m + n) * sizeof(AccT))) || (worksize < m * sizeof(AccT))),
-         "workspace size error");
+  ASSERT(!(worksize < (m + n) * sizeof(AccT)), "workspace size error");
   ASSERT(workspace != nullptr, "workspace is null");
 
   cudaStream_t stream = raft::resource::get_cuda_stream(handle);
 
   DataT* x_norm = workspace;
   DataT* y_norm = workspace;
-  if (x != y) {
+  // TODO: Column major case looks to have lower accuracy for X == Y,
+  // perhaps the use of stridedSummationKernel could be causing this,
+  // need to investigate and fix.
+  if (x == y && is_row_major) {
+    raft::linalg::rowNorm(
+      x_norm, x, k, std::max(m, n), raft::linalg::L2Norm, is_row_major, stream, raft::sqrt_op{});
+  } else {
     y_norm += m;
     raft::linalg::rowNorm(
       x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream, raft::sqrt_op{});
     raft::linalg::rowNorm(
       y_norm, y, k, n, raft::linalg::L2Norm, is_row_major, stream, raft::sqrt_op{});
-  } else {
-    raft::linalg::rowNorm(
-      x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream, raft::sqrt_op{});
   }
 
   ops::cosine_distance_op<DataT, AccT, IdxT> distance_op{};
@@ -453,21 +456,29 @@ void distance_impl_l2_expanded(  // NOTE: different name
                 "OutT can be uint8_t, float, double,"
                 "if sizeof(OutT) > 1 then sizeof(AccT) == sizeof(OutT).");
 
-  ASSERT(!(((x != y) && (worksize < (m + n) * sizeof(AccT))) || (worksize < m * sizeof(AccT))),
-         "workspace size error");
+  ASSERT(!(worksize < (m + n) * sizeof(AccT)), "workspace size error");
   ASSERT(workspace != nullptr, "workspace is null");
 
   DataT* x_norm = workspace;
   DataT* y_norm = workspace;
-  if (x != y) {
+  // TODO: Column major case looks to have lower accuracy for X == Y,
+  // perhaps the use of stridedSummationKernel could be causing this,
+  // need to investigate and fix.
+  if ((x == y) && is_row_major) {
+    raft::linalg::rowNorm(x_norm,
+                          x,
+                          k,
+                          std::max(m, n),
+                          raft::linalg::L2Norm,
+                          is_row_major,
+                          stream,
+                          raft::identity_op{});
+  } else {
     y_norm += m;
     raft::linalg::rowNorm(
       x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream, raft::identity_op{});
     raft::linalg::rowNorm(
       y_norm, y, k, n, raft::linalg::L2Norm, is_row_major, stream, raft::identity_op{});
-  } else {
-    raft::linalg::rowNorm(
-      x_norm, x, k, m, raft::linalg::L2Norm, is_row_major, stream, raft::identity_op{});
   }
 
   ops::l2_exp_distance_op<DataT, AccT, IdxT> distance_op{perform_sqrt};
@@ -789,8 +800,10 @@ size_t getWorkspaceSize(const InType* x, const InType* y, Index_ m, Index_ n, In
     (distanceType == raft::distance::DistanceType::CorrelationExpanded) ? 2 : 1;
 
   if (is_allocated) {
+    // TODO : when X == Y allocate std::max(m, n) instead of m + n when column major input
+    // accuracy issue is resolved until then we allocate as m + n.
     worksize += numOfBuffers * m * sizeof(AccType);
-    if (x != y) worksize += numOfBuffers * n * sizeof(AccType);
+    worksize += numOfBuffers * n * sizeof(AccType);
   }
 
   return worksize;
diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/test/distance/dist_correlation.cu
index fc729dec1c..aa2866483a 100644
--- a/cpp/test/distance/dist_correlation.cu
+++ b/cpp/test/distance/dist_correlation.cu
@@ -24,6 +24,10 @@ template <typename DataType>
 class DistanceCorrelation
   : public DistanceTest<raft::distance::DistanceType::CorrelationExpanded, DataType> {};
 
+template <typename DataType>
+class DistanceCorrelationXequalY
+  : public DistanceTestSameBuffer<raft::distance::DistanceType::CorrelationExpanded, DataType> {};
+
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
   {0.001f, 1024, 32, 1024, true, 1234ULL},
@@ -44,6 +48,25 @@ TEST_P(DistanceCorrelationF, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationF, ::testing::ValuesIn(inputsf));
 
+typedef DistanceCorrelationXequalY<float> DistanceCorrelationXequalYF;
+TEST_P(DistanceCorrelationXequalYF, Result)
+{
+  int m = params.m;
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[0].data(),
+                                dist[0].data(),
+                                m,
+                                m,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[1].data(),
+                                dist[1].data(),
+                                m / 2,
+                                m,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+}
+INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationXequalYF, ::testing::ValuesIn(inputsf));
+
 const std::vector<DistanceInputs<double>> inputsd = {
   {0.001, 1024, 1024, 32, true, 1234ULL},
   {0.001, 1024, 32, 1024, true, 1234ULL},
diff --git a/cpp/test/distance/dist_cos.cu b/cpp/test/distance/dist_cos.cu
index 9e1cf5af17..caf55529ed 100644
--- a/cpp/test/distance/dist_cos.cu
+++ b/cpp/test/distance/dist_cos.cu
@@ -24,6 +24,10 @@ template <typename DataType>
 class DistanceExpCos : public DistanceTest<raft::distance::DistanceType::CosineExpanded, DataType> {
 };
 
+template <typename DataType>
+class DistanceExpCosXequalY
+  : public DistanceTestSameBuffer<raft::distance::DistanceType::CosineExpanded, DataType> {};
+
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 1024, 1024, 32, true, 1234ULL},
   {0.001f, 1024, 32, 1024, true, 1234ULL},
@@ -34,6 +38,18 @@ const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 32, 1024, 1024, false, 1234ULL},
   {0.003f, 1024, 1024, 1024, false, 1234ULL},
 };
+
+const std::vector<DistanceInputs<float>> inputsXeqYf = {
+  {0.01f, 1024, 1024, 32, true, 1234ULL},
+  {0.01f, 1024, 32, 1024, true, 1234ULL},
+  {0.01f, 32, 1024, 1024, true, 1234ULL},
+  {0.03f, 1024, 1024, 1024, true, 1234ULL},
+  {0.01f, 1024, 1024, 32, false, 1234ULL},
+  {0.01f, 1024, 32, 1024, false, 1234ULL},
+  {0.01f, 32, 1024, 1024, false, 1234ULL},
+  {0.03f, 1024, 1024, 1024, false, 1234ULL},
+};
+
 typedef DistanceExpCos<float> DistanceExpCosF;
 TEST_P(DistanceExpCosF, Result)
 {
@@ -44,6 +60,29 @@ TEST_P(DistanceExpCosF, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosF, ::testing::ValuesIn(inputsf));
 
+typedef DistanceExpCosXequalY<float> DistanceExpCosXequalYF;
+TEST_P(DistanceExpCosXequalYF, Result)
+{
+  int m = params.m;
+  int n = params.m;
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[0].data(),
+                                dist[0].data(),
+                                m,
+                                n,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+  n = params.isRowMajor ? m : m / 2;
+  m = params.isRowMajor ? m / 2 : m;
+
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[1].data(),
+                                dist[1].data(),
+                                m,
+                                n,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+}
+INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpCosXequalYF, ::testing::ValuesIn(inputsXeqYf));
+
 const std::vector<DistanceInputs<double>> inputsd = {
   {0.001, 1024, 1024, 32, true, 1234ULL},
   {0.001, 1024, 32, 1024, true, 1234ULL},
diff --git a/cpp/test/distance/dist_l2_exp.cu b/cpp/test/distance/dist_l2_exp.cu
index 6b6a290386..7bdbb44362 100644
--- a/cpp/test/distance/dist_l2_exp.cu
+++ b/cpp/test/distance/dist_l2_exp.cu
@@ -24,6 +24,10 @@ template <typename DataType>
 class DistanceEucExpTest : public DistanceTest<raft::distance::DistanceType::L2Expanded, DataType> {
 };
 
+template <typename DataType>
+class DistanceEucExpTestXequalY
+  : public DistanceTestSameBuffer<raft::distance::DistanceType::L2Expanded, DataType> {};
+
 const std::vector<DistanceInputs<float>> inputsf = {
   {0.001f, 2048, 4096, 128, true, 1234ULL},
   {0.001f, 1024, 1024, 32, true, 1234ULL},
@@ -37,6 +41,21 @@ const std::vector<DistanceInputs<float>> inputsf = {
   {0.003f, 1024, 1024, 1024, false, 1234ULL},
   {0.003f, 1021, 1021, 1021, false, 1234ULL},
 };
+
+const std::vector<DistanceInputs<float>> inputsXeqYf = {
+  {0.01f, 2048, 4096, 128, true, 1234ULL},
+  {0.01f, 1024, 1024, 32, true, 1234ULL},
+  {0.01f, 1024, 32, 1024, true, 1234ULL},
+  {0.01f, 32, 1024, 1024, true, 1234ULL},
+  {0.03f, 1024, 1024, 1024, true, 1234ULL},
+  {0.03f, 1021, 1021, 1021, true, 1234ULL},
+  {0.01f, 1024, 1024, 32, false, 1234ULL},
+  {0.01f, 1024, 32, 1024, false, 1234ULL},
+  {0.01f, 32, 1024, 1024, false, 1234ULL},
+  {0.03f, 1024, 1024, 1024, false, 1234ULL},
+  {0.03f, 1021, 1021, 1021, false, 1234ULL},
+};
+
 typedef DistanceEucExpTest<float> DistanceEucExpTestF;
 TEST_P(DistanceEucExpTestF, Result)
 {
@@ -47,6 +66,27 @@ TEST_P(DistanceEucExpTestF, Result)
 }
 INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceEucExpTestF, ::testing::ValuesIn(inputsf));
 
+typedef DistanceEucExpTestXequalY<float> DistanceEucExpTestXequalYF;
+TEST_P(DistanceEucExpTestXequalYF, Result)
+{
+  int m = params.m;
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[0].data(),
+                                dist[0].data(),
+                                m,
+                                m,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+  ASSERT_TRUE(raft::devArrMatch(dist_ref[1].data(),
+                                dist[1].data(),
+                                m / 2,
+                                m,
+                                raft::CompareApprox<float>(params.tolerance),
+                                stream));
+}
+INSTANTIATE_TEST_CASE_P(DistanceTests,
+                        DistanceEucExpTestXequalYF,
+                        ::testing::ValuesIn(inputsXeqYf));
+
 const std::vector<DistanceInputs<double>> inputsd = {
   {0.001, 1024, 1024, 32, true, 1234ULL},
   {0.001, 1024, 32, 1024, true, 1234ULL},
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index 6c7cab3f7b..20d78c7bb5 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -532,6 +532,108 @@ class DistanceTest : public ::testing::TestWithParam<DistanceInputs<DataType>> {
   rmm::device_uvector<DataType> x, y, dist_ref, dist, dist2;
 };
 
+/*
+ * This test suite verifies the path when X and Y are same buffer,
+ * distance metrics which requires norms like L2 expanded/cosine/correlation
+ * takes a more optimal path in such case to skip norm calculation for Y buffer.
+ * It may happen that though both X and Y are same buffer but user passes
+ * different dimensions for them like in case of tiled_brute_force_knn.
+ */
+template <raft::distance::DistanceType distanceType, typename DataType>
+class DistanceTestSameBuffer : public ::testing::TestWithParam<DistanceInputs<DataType>> {
+ public:
+  using dev_vector = rmm::device_uvector<DataType>;
+  DistanceTestSameBuffer()
+    : params(::testing::TestWithParam<DistanceInputs<DataType>>::GetParam()),
+      stream(resource::get_cuda_stream(handle)),
+      x(params.m * params.k, stream),
+      dist_ref({dev_vector(params.m * params.m, stream), dev_vector(params.m * params.m, stream)}),
+      dist({dev_vector(params.m * params.m, stream), dev_vector(params.m * params.m, stream)}),
+      dist2({dev_vector(params.m * params.m, stream), dev_vector(params.m * params.m, stream)})
+  {
+  }
+
+  void SetUp() override
+  {
+    auto testInfo = testing::UnitTest::GetInstance()->current_test_info();
+    common::nvtx::range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name());
+
+    raft::random::RngState r(params.seed);
+    int m               = params.m;
+    int n               = params.m;
+    int k               = params.k;
+    DataType metric_arg = params.metric_arg;
+    bool isRowMajor     = params.isRowMajor;
+    if (distanceType == raft::distance::DistanceType::HellingerExpanded ||
+        distanceType == raft::distance::DistanceType::JensenShannon ||
+        distanceType == raft::distance::DistanceType::KLDivergence) {
+      // Hellinger works only on positive numbers
+      uniform(handle, r, x.data(), m * k, DataType(0.0), DataType(1.0));
+    } else if (distanceType == raft::distance::DistanceType::RusselRaoExpanded) {
+      uniform(handle, r, x.data(), m * k, DataType(0.0), DataType(1.0));
+      // Russel rao works on boolean values.
+      bernoulli(handle, r, x.data(), m * k, 0.5f);
+    } else {
+      uniform(handle, r, x.data(), m * k, DataType(-1.0), DataType(1.0));
+    }
+
+    for (int i = 0; i < 2; i++) {
+      // both X and Y are same buffer but when i = 1
+      // different dimensions for x & y is passed.
+      m = m / (i + 1);
+      naiveDistance(dist_ref[i].data(),
+                    x.data(),
+                    x.data(),
+                    m,
+                    n,
+                    k,
+                    distanceType,
+                    isRowMajor,
+                    metric_arg,
+                    stream);
+
+      DataType threshold = -10000.f;
+
+      if (isRowMajor) {
+        distanceLauncher<distanceType, DataType, layout_c_contiguous>(handle,
+                                                                      x.data(),
+                                                                      x.data(),
+                                                                      dist[i].data(),
+                                                                      dist2[i].data(),
+                                                                      m,
+                                                                      n,
+                                                                      k,
+                                                                      params,
+                                                                      threshold,
+                                                                      metric_arg);
+
+      } else {
+        distanceLauncher<distanceType, DataType, layout_f_contiguous>(handle,
+                                                                      x.data(),
+                                                                      x.data(),
+                                                                      dist[i].data(),
+                                                                      dist2[i].data(),
+                                                                      m,
+                                                                      n,
+                                                                      k,
+                                                                      params,
+                                                                      threshold,
+                                                                      metric_arg);
+      }
+    }
+    resource::sync_stream(handle, stream);
+  }
+
+ protected:
+  raft::resources handle;
+  cudaStream_t stream;
+
+  DistanceInputs<DataType> params;
+  dev_vector x;
+  static const int N = 2;
+  std::array<dev_vector, N> dist_ref, dist, dist2;
+};
+
 template <raft::distance::DistanceType distanceType>
 class BigMatrixDistanceTest : public ::testing::Test {
  public:

From 4de0748aabcbe996feb0e4e4a436197575edee3d Mon Sep 17 00:00:00 2001
From: Ray Douglass <ray@raydouglass.com>
Date: Wed, 7 Jun 2023 10:41:24 -0400
Subject: [PATCH 78/78] update changelog

---
 CHANGELOG.md | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5ca5995e4..16c3ba4985 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,97 @@
+# raft 23.06.00 (7 Jun 2023)
+
+## 🚨 Breaking Changes
+
+- ivf-pq::search: fix the indexing type of the query-related mdspan arguments ([#1539](https://github.com/rapidsai/raft/pull/1539)) [@achirkin](https://github.com/achirkin)
+- Dropping Python 3.8 ([#1454](https://github.com/rapidsai/raft/pull/1454)) [@divyegala](https://github.com/divyegala)
+
+## 🐛 Bug Fixes
+
+- [HOTFIX] Fix  distance metrics L2/cosine/correlation when X &amp; Y are same buffer but with different shape and add unit test for such case. ([#1571](https://github.com/rapidsai/raft/pull/1571)) [@mdoijade](https://github.com/mdoijade)
+- Using raft::resources in rsvd ([#1543](https://github.com/rapidsai/raft/pull/1543)) [@cjnolet](https://github.com/cjnolet)
+- ivf-pq::search: fix the indexing type of the query-related mdspan arguments ([#1539](https://github.com/rapidsai/raft/pull/1539)) [@achirkin](https://github.com/achirkin)
+- Check python brute-force knn inputs ([#1537](https://github.com/rapidsai/raft/pull/1537)) [@benfred](https://github.com/benfred)
+- Fix failing TiledKNNTest unittest ([#1533](https://github.com/rapidsai/raft/pull/1533)) [@benfred](https://github.com/benfred)
+- ivf-flat: fix incorrect recomputed size of the index ([#1525](https://github.com/rapidsai/raft/pull/1525)) [@achirkin](https://github.com/achirkin)
+- ivf-flat: limit the workspace size of the search via batching ([#1515](https://github.com/rapidsai/raft/pull/1515)) [@achirkin](https://github.com/achirkin)
+- Support uint64_t in CAGRA index data type ([#1514](https://github.com/rapidsai/raft/pull/1514)) [@enp1s0](https://github.com/enp1s0)
+- Workaround for cuda 12 issue in cusparse ([#1508](https://github.com/rapidsai/raft/pull/1508)) [@cjnolet](https://github.com/cjnolet)
+- Un-scale output distances ([#1499](https://github.com/rapidsai/raft/pull/1499)) [@achirkin](https://github.com/achirkin)
+- Inline get_cache_idx ([#1492](https://github.com/rapidsai/raft/pull/1492)) [@ahendriksen](https://github.com/ahendriksen)
+- Pin to scikit-build&lt;17.2 ([#1487](https://github.com/rapidsai/raft/pull/1487)) [@vyasr](https://github.com/vyasr)
+- Remove pool_size() calls from debug printouts ([#1484](https://github.com/rapidsai/raft/pull/1484)) [@tfeher](https://github.com/tfeher)
+- Add missing ext declaration for log detail::format ([#1482](https://github.com/rapidsai/raft/pull/1482)) [@tfeher](https://github.com/tfeher)
+- Remove include statements from inside namespace ([#1467](https://github.com/rapidsai/raft/pull/1467)) [@robertmaynard](https://github.com/robertmaynard)
+- Use pin_compatible to ensure that lower CTKs can be used ([#1462](https://github.com/rapidsai/raft/pull/1462)) [@vyasr](https://github.com/vyasr)
+- fix ivf_pq n_probes ([#1456](https://github.com/rapidsai/raft/pull/1456)) [@benfred](https://github.com/benfred)
+- The glog project root CMakeLists.txt is where we should build from ([#1442](https://github.com/rapidsai/raft/pull/1442)) [@robertmaynard](https://github.com/robertmaynard)
+- Add missing resource factory virtual destructor ([#1433](https://github.com/rapidsai/raft/pull/1433)) [@cjnolet](https://github.com/cjnolet)
+- Removing cuda stream view include from mdarray ([#1429](https://github.com/rapidsai/raft/pull/1429)) [@cjnolet](https://github.com/cjnolet)
+- Fix dim param for IVF-PQ wrapper in ANN bench ([#1427](https://github.com/rapidsai/raft/pull/1427)) [@tfeher](https://github.com/tfeher)
+- Remove MetricProcessor code from brute_force::knn ([#1426](https://github.com/rapidsai/raft/pull/1426)) [@benfred](https://github.com/benfred)
+- Fix is_min_close ([#1419](https://github.com/rapidsai/raft/pull/1419)) [@benfred](https://github.com/benfred)
+- Have consistent compile lines between BUILD_TESTS enabled or not ([#1401](https://github.com/rapidsai/raft/pull/1401)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix ucx-py pin in raft-dask recipe ([#1396](https://github.com/rapidsai/raft/pull/1396)) [@vyasr](https://github.com/vyasr)
+
+## 📖 Documentation
+
+- Various updates to the docs for 23.06 release ([#1538](https://github.com/rapidsai/raft/pull/1538)) [@cjnolet](https://github.com/cjnolet)
+- Rename kernel arch finding function for dispatch ([#1536](https://github.com/rapidsai/raft/pull/1536)) [@mdoijade](https://github.com/mdoijade)
+- Adding bfknn and ivf-pq python api to docs ([#1507](https://github.com/rapidsai/raft/pull/1507)) [@cjnolet](https://github.com/cjnolet)
+- Add RAPIDS cuDF as a library that supports cuda_array_interface ([#1444](https://github.com/rapidsai/raft/pull/1444)) [@miguelusque](https://github.com/miguelusque)
+
+## 🚀 New Features
+
+- IVF-PQ: manipulating individual lists ([#1298](https://github.com/rapidsai/raft/pull/1298)) [@achirkin](https://github.com/achirkin)
+- Gram matrix support for sparse input ([#1296](https://github.com/rapidsai/raft/pull/1296)) [@mfoerste4](https://github.com/mfoerste4)
+- [FEA] Add randomized svd from cusolver ([#1000](https://github.com/rapidsai/raft/pull/1000)) [@lowener](https://github.com/lowener)
+
+## 🛠️ Improvements
+
+- Require Numba 0.57.0+ ([#1559](https://github.com/rapidsai/raft/pull/1559)) [@jakirkham](https://github.com/jakirkham)
+- remove device_resources include from linalg::map ([#1540](https://github.com/rapidsai/raft/pull/1540)) [@benfred](https://github.com/benfred)
+- Learn heuristic to pick fastest select_k algorithm ([#1523](https://github.com/rapidsai/raft/pull/1523)) [@benfred](https://github.com/benfred)
+- [REVIEW] make raft::cache::Cache protected to allow overrides ([#1522](https://github.com/rapidsai/raft/pull/1522)) [@mfoerste4](https://github.com/mfoerste4)
+- [REVIEW] Fix padding assertion in sparse Gram evaluation ([#1521](https://github.com/rapidsai/raft/pull/1521)) [@mfoerste4](https://github.com/mfoerste4)
+- run docs nightly too ([#1520](https://github.com/rapidsai/raft/pull/1520)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Switch back to using primary shared-action-workflows branch ([#1519](https://github.com/rapidsai/raft/pull/1519)) [@vyasr](https://github.com/vyasr)
+- Python API for IVF-Flat serialization ([#1516](https://github.com/rapidsai/raft/pull/1516)) [@tfeher](https://github.com/tfeher)
+- Introduce sample filtering to IVFPQ index search ([#1513](https://github.com/rapidsai/raft/pull/1513)) [@alexanderguzhva](https://github.com/alexanderguzhva)
+- Migrate from raft::device_resources -&gt; raft::resources ([#1510](https://github.com/rapidsai/raft/pull/1510)) [@benfred](https://github.com/benfred)
+- Use rmm allocator in CAGRA prune ([#1503](https://github.com/rapidsai/raft/pull/1503)) [@enp1s0](https://github.com/enp1s0)
+- Update recipes to GTest version &gt;=1.13.0 ([#1501](https://github.com/rapidsai/raft/pull/1501)) [@bdice](https://github.com/bdice)
+- Remove raft/matrix/matrix.cuh includes ([#1498](https://github.com/rapidsai/raft/pull/1498)) [@benfred](https://github.com/benfred)
+- Generate dataset of select_k times ([#1497](https://github.com/rapidsai/raft/pull/1497)) [@benfred](https://github.com/benfred)
+- Re-use memory pool between benchmark runs ([#1495](https://github.com/rapidsai/raft/pull/1495)) [@benfred](https://github.com/benfred)
+- Support CUDA 12.0 for pip wheels ([#1489](https://github.com/rapidsai/raft/pull/1489)) [@divyegala](https://github.com/divyegala)
+- Update cupy dependency ([#1488](https://github.com/rapidsai/raft/pull/1488)) [@vyasr](https://github.com/vyasr)
+- Enable sccache hits from local builds ([#1478](https://github.com/rapidsai/raft/pull/1478)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Build wheels using new single image workflow ([#1477](https://github.com/rapidsai/raft/pull/1477)) [@vyasr](https://github.com/vyasr)
+- Revert shared-action-workflows pin ([#1475](https://github.com/rapidsai/raft/pull/1475)) [@divyegala](https://github.com/divyegala)
+- CAGRA: Separate graph index sorting functionality from prune function ([#1471](https://github.com/rapidsai/raft/pull/1471)) [@enp1s0](https://github.com/enp1s0)
+- Add generic reduction functions and separate reductions/warp_primitives ([#1470](https://github.com/rapidsai/raft/pull/1470)) [@akifcorduk](https://github.com/akifcorduk)
+- [ENH] [FINAL] Header structure: combine all PRs into one ([#1469](https://github.com/rapidsai/raft/pull/1469)) [@ahendriksen](https://github.com/ahendriksen)
+- use `matrix::select_k` in brute_force::knn call ([#1463](https://github.com/rapidsai/raft/pull/1463)) [@benfred](https://github.com/benfred)
+- Dropping Python 3.8 ([#1454](https://github.com/rapidsai/raft/pull/1454)) [@divyegala](https://github.com/divyegala)
+- Fix linalg::map to work with non-power-of-2-sized types again ([#1453](https://github.com/rapidsai/raft/pull/1453)) [@ahendriksen](https://github.com/ahendriksen)
+- [ENH] Enable building with clang (limit strict error checking to GCC) ([#1452](https://github.com/rapidsai/raft/pull/1452)) [@ahendriksen](https://github.com/ahendriksen)
+- Remove usage of rapids-get-rapids-version-from-git ([#1436](https://github.com/rapidsai/raft/pull/1436)) [@jjacobelli](https://github.com/jjacobelli)
+- Minor Updates to Sparse Structures ([#1432](https://github.com/rapidsai/raft/pull/1432)) [@divyegala](https://github.com/divyegala)
+- Use nvtx3 includes. ([#1431](https://github.com/rapidsai/raft/pull/1431)) [@bdice](https://github.com/bdice)
+- Remove wheel pytest verbosity ([#1424](https://github.com/rapidsai/raft/pull/1424)) [@sevagh](https://github.com/sevagh)
+- Add python bindings for matrix::select_k ([#1422](https://github.com/rapidsai/raft/pull/1422)) [@benfred](https://github.com/benfred)
+- Using `raft::resources` across `raft::random` ([#1420](https://github.com/rapidsai/raft/pull/1420)) [@cjnolet](https://github.com/cjnolet)
+- Generate build metrics report for test and benchmarks ([#1414](https://github.com/rapidsai/raft/pull/1414)) [@divyegala](https://github.com/divyegala)
+- Update clang-format to 16.0.1. ([#1412](https://github.com/rapidsai/raft/pull/1412)) [@bdice](https://github.com/bdice)
+- Use ARC V2 self-hosted runners for GPU jobs ([#1410](https://github.com/rapidsai/raft/pull/1410)) [@jjacobelli](https://github.com/jjacobelli)
+- Remove uses-setup-env-vars ([#1406](https://github.com/rapidsai/raft/pull/1406)) [@vyasr](https://github.com/vyasr)
+- Resolve conflicts in auto-merger of `branch-23.06` and `branch-23.04` ([#1403](https://github.com/rapidsai/raft/pull/1403)) [@galipremsagar](https://github.com/galipremsagar)
+- Adding base header-only conda package without cuda math libs ([#1386](https://github.com/rapidsai/raft/pull/1386)) [@cjnolet](https://github.com/cjnolet)
+- Fix IVF-PQ API to use `device_vector_view` ([#1384](https://github.com/rapidsai/raft/pull/1384)) [@lowener](https://github.com/lowener)
+- Branch 23.06 merge 23.04 ([#1379](https://github.com/rapidsai/raft/pull/1379)) [@vyasr](https://github.com/vyasr)
+- Forward merge branch 23.04 into 23.06 ([#1350](https://github.com/rapidsai/raft/pull/1350)) [@cjnolet](https://github.com/cjnolet)
+- Fused L2 1-NN based on cutlass 3xTF32 / DMMA ([#1118](https://github.com/rapidsai/raft/pull/1118)) [@mdoijade](https://github.com/mdoijade)
+
 # raft 23.04.00 (6 Apr 2023)
 
 ## 🚨 Breaking Changes