From 883ed04f1b5fce77168d85925b89b6afd99365cf Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Thu, 23 Mar 2023 14:57:18 -0400 Subject: [PATCH 01/78] DOC --- .github/workflows/build.yaml | 16 +++++++------- .github/workflows/pr.yaml | 22 +++++++++---------- .github/workflows/test.yaml | 8 +++---- .../all_cuda-118_arch-x86_64.yaml | 6 ++--- cpp/CMakeLists.txt | 4 ++-- cpp/doxygen/Doxyfile | 2 +- dependencies.yaml | 6 ++--- docs/source/build.md | 4 ++-- docs/source/conf.py | 4 ++-- docs/source/developer_guide.md | 18 +++++++-------- fetch_rapids.cmake | 2 +- python/pylibraft/CMakeLists.txt | 2 +- python/pylibraft/pylibraft/__init__.py | 2 +- python/pylibraft/pyproject.toml | 6 ++--- python/raft-dask/CMakeLists.txt | 2 +- python/raft-dask/pyproject.toml | 8 +++---- python/raft-dask/raft_dask/__init__.py | 2 +- 17 files changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 41b6a639d8..d22af4779e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' && github.event_name == 'push' needs: python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: branch node_type: "gpu-latest-1" @@ -65,7 +65,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -78,7 +78,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-build-raft-dask: needs: wheel-publish-pylibraft secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,7 +101,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1d35611537..bf080d6ad2 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -23,41 +23,41 @@ jobs: - wheel-build-raft-dask - wheel-tests-raft-dask secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.06 checks: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.06 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06 with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request node_type: "gpu-latest-1" @@ -67,7 +67,7 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: pull-request package-name: pylibraft @@ -77,7 +77,7 @@ jobs: wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: pull-request package-name: pylibraft @@ -89,7 +89,7 @@ jobs: wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.06 with: build_type: pull-request package-name: raft_dask @@ -100,7 +100,7 @@ jobs: wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: pull-request package-name: raft_dask diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aa7ca21b5f..f1207c3545 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -44,7 +44,7 @@ jobs: test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test" wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 47af29d9d2..9d447116a3 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -18,7 +18,7 @@ dependencies: - cupy - cxx-compiler - cython>=0.29,<0.30 -- dask-cuda=23.04 +- dask-cuda=23.06 - dask>=2023.1.1 - distributed>=2023.1.1 - doxygen>=1.8.20 @@ -41,7 +41,7 @@ dependencies: - pytest - pytest-cov - recommonmark -- rmm=23.04 +- rmm=23.06 - scikit-build>=0.13.1 - scikit-learn - scipy @@ -49,6 +49,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py=0.31.* +- ucx-py=0.32.* - ucx>=1.13.0 name: all_cuda-118_arch-x86_64 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2999045a0c..840321c3fa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -10,8 +10,8 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under # the License. -set(RAPIDS_VERSION "23.04") -set(RAFT_VERSION "23.04.00") +set(RAPIDS_VERSION "23.06") +set(RAFT_VERSION "23.06.00") cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) include(../fetch_rapids.cmake) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 2a92c67996..17a1e0caca 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "RAFT C++ API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "23.04" +PROJECT_NUMBER = "23.06" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/dependencies.yaml b/dependencies.yaml index 93893d07af..e920141a79 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -176,12 +176,12 @@ dependencies: - dask>=2023.1.1 - distributed>=2023.1.1 - ucx>=1.13.0 - - ucx-py=0.31.* + - ucx-py=0.32.* - ucx-proc=*=gpu - - rmm=23.04 + - rmm=23.06 - libfaiss>=1.7.1=cuda* - faiss-proc=*=cuda - - dask-cuda=23.04 + - dask-cuda=23.06 test_python: common: - output_types: [conda, requirements] diff --git a/docs/source/build.md b/docs/source/build.md index 70b07f4e81..29d0a72a37 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -265,7 +265,7 @@ When the needed [build dependencies](#build-dependencies) are already satisfied, set(RAFT_GIT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo") ExternalProject_Add(raft GIT_REPOSITORY git@github.com:rapidsai/raft.git - GIT_TAG branch-23.04 + GIT_TAG branch-23.06 PREFIX ${RAFT_GIT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" @@ -297,7 +297,7 @@ The following `cmake` snippet enables a flexible configuration of RAFT: ```cmake -set(RAFT_VERSION "23.04") +set(RAFT_VERSION "23.06") set(RAFT_FORK "rapidsai") set(RAFT_PINNED_TAG "branch-${RAFT_VERSION}") diff --git a/docs/source/conf.py b/docs/source/conf.py index 33a8a9217a..f9054420ca 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -67,9 +67,9 @@ # built documents. # # The short X.Y version. -version = '23.04' +version = '23.06' # The full version, including alpha/beta/rc tags. -release = '23.04.00' +release = '23.06.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 56100b38f7..6f57453e28 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -140,13 +140,13 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/.clang-format). #### How is the check done? -All formatting checks are done by this python script: [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR. +All formatting checks are done by this python script: [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) which is effectively a wrapper over `clang-format`. An error is raised if the code diverges from the format suggested by clang-format. It is expected that the developers run this script to detect and fix formatting violations before creating PR. ##### As part of CI -[run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section. +[run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) is executed as part of our `ci/checks/style.sh` CI test. If there are any formatting violations, PR author is expected to fix those to get CI passing. Steps needed to fix the formatting violations are described in the subsequent sub-section. ##### Manually Developers can also manually (or setup this command as part of git pre-commit hook) run this check by executing: @@ -156,10 +156,10 @@ python ./cpp/scripts/run-clang-format.py From the root of the RAFT repository. #### How to know the formatting violations? -When there are formatting errors, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file. +When there are formatting errors, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints a `diff` command, showing where there are formatting differences. Unfortunately, unlike `flake8`, `clang-format` does NOT print descriptions of the violations, but instead directly formats the code. So, the only way currently to know about formatting differences is to run the diff command as suggested by this script against each violating source file. #### How to fix the formatting violations? -When there are formatting violations, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs. +When there are formatting violations, [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) prints at the end, the exact command that can be run by developers to fix them. This is the easiest way to fix formatting errors. [This screencast](https://asciinema.org/a/287367) shows how developers can check for formatting violations in their branches and also how to fix those, before sending out PRs. In short, to bulk-fix all the formatting violations, execute the following command: ```bash @@ -168,13 +168,13 @@ python ./cpp/scripts/run-clang-format.py -inplace From the root of the RAFT repository. #### clang-format version? -To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/run-clang-format.py) script itself. Refer [here](../build#build-dependencies) for the list of build-time dependencies. +To avoid spurious code style violations we specify the exact clang-format version required, currently `11.1.0`. This is enforced by the [run-clang-format.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/run-clang-format.py) script itself. Refer [here](../build#build-dependencies) for the list of build-time dependencies. #### Additional scripts Along with clang, there are an include checker and copyright checker scripts for checking style, which can be performed as part of CI, as well as manually. ##### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -184,7 +184,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list ``` ##### Copyright header -[copyright.py](https://github.com/rapidsai/raft/blob/branch-23.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files +[copyright.py](https://github.com/rapidsai/raft/blob/branch-23.06/ci/checks/copyright.py) checks the Copyright header for all git-modified files Manually, you can run the following to bulk-fix the header if only the years need to be updated: ```bash @@ -198,7 +198,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-23.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-23.06/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 2d312bd3e5..c664fd1d9f 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.04/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.06/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake ) endif() diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index b12d0a63ea..77a2a7114e 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) -set(pylibraft_version 23.04.00) +set(pylibraft_version 23.06.00) include(../../fetch_rapids.cmake) diff --git a/python/pylibraft/pylibraft/__init__.py b/python/pylibraft/pylibraft/__init__.py index 39145085f0..aebaa4e272 100644 --- a/python/pylibraft/pylibraft/__init__.py +++ b/python/pylibraft/pylibraft/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. # -__version__ = "23.04.00" +__version__ = "23.06.00" diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index 7d92fd0763..785a6df6c8 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -22,13 +22,13 @@ requires = [ "scikit-build>=0.13.1", "cmake>=3.23.1,!=3.25.0", "ninja", - "rmm==23.4.*", + "rmm==23.6.*", ] build-backend = "setuptools.build_meta" [project] name = "pylibraft" -version = "23.04.00" +version = "23.06.00" description = "RAFT: Reusable Algorithms Functions and other Tools" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -39,7 +39,7 @@ requires-python = ">=3.8" dependencies = [ "numpy", "cuda-python>=11.7.1,<12.0", - "rmm==23.4.*", + "rmm==23.6.*", ] classifiers = [ "Intended Audience :: Developers", diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt index 8486523226..816c68e83c 100644 --- a/python/raft-dask/CMakeLists.txt +++ b/python/raft-dask/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) -set(raft_dask_version 23.04.00) +set(raft_dask_version 23.06.00) include(../../fetch_rapids.cmake) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 2fe6522f57..88ac8d80ac 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -25,7 +25,7 @@ requires = [ [project] name = "raft-dask" -version = "23.04.00" +version = "23.06.00" description = "Reusable Accelerated Functions & Tools Dask Infrastructure" readme = { file = "README.md", content-type = "text/markdown" } authors = [ @@ -37,11 +37,11 @@ dependencies = [ "numpy", "numba>=0.49", "joblib>=0.11", - "dask-cuda==23.4.*", + "dask-cuda==23.6.*", "dask>=2023.1.1", - "ucx-py==0.31.*", + "ucx-py==0.32.*", "distributed>=2023.1.1", - "pylibraft==23.4.*", + "pylibraft==23.6.*", ] classifiers = [ "Intended Audience :: Developers", diff --git a/python/raft-dask/raft_dask/__init__.py b/python/raft-dask/raft_dask/__init__.py index 4f4700df48..9582da4851 100644 --- a/python/raft-dask/raft_dask/__init__.py +++ b/python/raft-dask/raft_dask/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. # -__version__ = "23.04.00" +__version__ = "23.06.00" From a88072cbe4784a25d6efef05d4de4528dd4fa5ae Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 29 Mar 2023 21:17:48 -0400 Subject: [PATCH 02/78] Update rapids version --- cpp/template/cmake/thirdparty/fetch_rapids.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index 40ba83be9e..248f4f1af4 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "23.04") +set(RAPIDS_VERSION "23.06") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake From 5f0e66d1b18a8ce992db61add7e726ec4d5c2848 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 29 Mar 2023 21:38:40 -0400 Subject: [PATCH 03/78] Update pylibraft version --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 6a07cd890d..0460e2dd81 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -278,7 +278,7 @@ dependencies: - ucx-proc=*=gpu - output_types: pyproject packages: - - pylibraft==23.4.* + - pylibraft==23.6.* test_python_common: common: - output_types: [conda, requirements, pyproject] From 3c5b8de4c791e2ab44cadbaa68acd29b8ef9dcbb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 29 Mar 2023 21:48:31 -0400 Subject: [PATCH 04/78] Run dfg --- python/raft-dask/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 0ca9e7a876..1fb5aa8f7c 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "joblib>=0.11", "numba>=0.49", "numpy>=1.21", - "pylibraft==23.4.*", + "pylibraft==23.6.*", "ucx-py==0.32.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ From 9bac6d53d32567d218b8188c88f4cdf665625fe4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 30 Mar 2023 09:40:42 -0400 Subject: [PATCH 05/78] Fix dask versions in wheel build preinstallation --- .github/workflows/pr.yaml | 4 ++-- .github/workflows/test.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index bf080d6ad2..cf8f8cd4b5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -105,7 +105,7 @@ jobs: build_type: pull-request package-name: raft_dask # Always want to test against latest dask/distributed. - test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" - test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" + test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test" test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f1207c3545..ebf596c958 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -51,6 +51,6 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} package-name: raft_dask - test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" - test-before-arm64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04" + test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-before-arm64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test" From 698d1dfa6e9b5f6069f88f9aad121aa394f1cc64 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 31 Mar 2023 19:12:15 -0400 Subject: [PATCH 06/78] Fix ucx-py pin in raft-dask recipe (#1396) Update the ucx-py pinning for raft-dask 23.06 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Mark Sadang (https://github.com/msadang) URL: https://github.com/rapidsai/raft/pull/1396 --- conda/recipes/raft-dask/conda_build_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index 778b187870..4f88728f4b 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -14,7 +14,7 @@ ucx_version: - ">=1.13.0,<1.15.0" ucx_py_version: - - "0.31.*" + - "0.32.*" cmake_version: - ">=3.23.1,!=3.25.0" From 9048dff15bfaa0bb9e30bb8458aa0f3380660af6 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 7 Apr 2023 08:22:33 -0400 Subject: [PATCH 07/78] Have consistent compile lines between BUILD_TESTS enabled or not (#1401) This will remove 1h from our conda CI builds since we can now re-use the cached object files between `libraft` and `libraft-tests` Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Ben Frederickson (https://github.com/benfred) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1401 --- cpp/CMakeLists.txt | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f4e03249d4..144f58c4d6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -70,15 +70,12 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations" ${RAFT_COMPILE_LIBRARY_DEFAULT} ) -if(BUILD_TESTS - OR BUILD_PRIMS_BENCH - OR BUILD_ANN_BENCH -) - # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs - # to have different values for the `Threads::Threads` target. Setting this flag ensures - # `Threads::Threads` is the same value in first run and subsequent runs. - set(THREADS_PREFER_PTHREAD_FLAG ON) -endif() + +# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs +# to have different values for the `Threads::Threads` target. Setting this flag ensures +# `Threads::Threads` is the same value across all builds so that cache hits occur +set(THREADS_PREFER_PTHREAD_FLAG ON) + include(CMakeDependentOption) # cmake_dependent_option( RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for From a98295b516ef58bc855177077860bab2a2a76d77 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Apr 2023 11:08:35 -0700 Subject: [PATCH 08/78] Remove uses-setup-env-vars (#1406) This setting now matches the default behavior of the shared-action-workflows repo Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1406 --- .github/workflows/build.yaml | 2 -- .github/workflows/pr.yaml | 2 -- 2 files changed, 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3c8cc4912d..bec89ab888 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -75,7 +75,6 @@ jobs: package-name: pylibraft package-dir: python/pylibraft skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF" - uses-setup-env-vars: false wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit @@ -98,7 +97,6 @@ jobs: package-name: raft_dask package-dir: python/raft-dask skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF" - uses-setup-env-vars: false wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 23834ab21c..8175b4fbc7 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -73,7 +73,6 @@ jobs: package-name: pylibraft package-dir: python/pylibraft skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF" - uses-setup-env-vars: false wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit @@ -96,7 +95,6 @@ jobs: package-dir: python/raft-dask before-wheel: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-wheelhouse" skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF" - uses-setup-env-vars: false wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit From 35c2f1c95a7da45fa5ef703ab66ef2f89e613e4d Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 13 Apr 2023 19:15:18 -0400 Subject: [PATCH 09/78] Generate build metrics report for test and benchmarks (#1414) Authors: - Divye Gala (https://github.com/divyegala) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1414 --- build.sh | 37 ++++++++++++++----- conda/recipes/libraft/build_libraft.sh | 2 +- .../recipes/libraft/build_libraft_nn_bench.sh | 2 +- conda/recipes/libraft/build_libraft_tests.sh | 2 +- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/build.sh b/build.sh index 270c75de93..039f0ed6a5 100755 --- a/build.sh +++ b/build.sh @@ -18,8 +18,8 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --build-metrics --incl-cache-stats --time -h" -HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--limit-bench-ann=] +VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) libraft - build the raft C++ code only. Also builds the C-wrapper library @@ -45,7 +45,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument) --cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used @@ -73,7 +73,7 @@ BUILD_PRIMS_BENCH=OFF BUILD_ANN_BENCH=OFF COMPILE_LIBRARY=OFF INSTALL_TARGET=install -BUILD_REPORT_METRICS=OFF +BUILD_REPORT_METRICS="" BUILD_REPORT_INCL_CACHE_STATS=OFF TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;NEIGHBORS_TEST;STATS_TEST;UTILS_TEST" @@ -189,6 +189,25 @@ function limitAnnBench { fi } +function buildMetrics { + # Check for multiple build-metrics options + if [[ $(echo $ARGS | { grep -Eo "\-\-build\-metrics" || true; } | wc -l ) -gt 1 ]]; then + echo "Multiple --build-metrics options were provided, please provide only one: ${ARGS}" + exit 1 + fi + # Check for build-metrics option + if [[ -n $(echo $ARGS | { grep -E "\-\-build\-metrics" || true; } ) ]]; then + # There are possible weird edge cases that may cause this regex filter to output nothing and fail silently + # the true pipe will catch any weird edge cases that may happen and will cause the program to fall back + # on the invalid option error + BUILD_REPORT_METRICS=$(echo $ARGS | sed -e 's/.*--build-metrics=//' -e 's/ .*//') + if [[ -n ${BUILD_REPORT_METRICS} ]]; then + # Remove the full BUILD_REPORT_METRICS argument from list of args so that it passes validArgs function + ARGS=${ARGS//--build-metrics=$BUILD_REPORT_METRICS/} + fi + fi +} + if hasArg -h || hasArg --help; then echo "${HELP}" exit 0 @@ -201,6 +220,7 @@ if (( ${NUMARGS} != 0 )); then limitTests limitBench limitAnnBench + buildMetrics for a in ${ARGS}; do if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then echo "Invalid option: ${a}" @@ -339,9 +359,6 @@ fi if hasArg clean; then CLEAN=1 fi -if hasArg --build-metrics; then - BUILD_REPORT_METRICS=ON -fi if hasArg --incl-cache-stats; then BUILD_REPORT_INCL_CACHE_STATS=ON fi @@ -422,7 +439,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has compile_end=$(date +%s) compile_total=$(( compile_end - compile_start )) - if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIBRAFT_BUILD_DIR}/.ninja_log" ]]; then + if [[ -n "$BUILD_REPORT_METRICS" && -f "${LIBRAFT_BUILD_DIR}/.ninja_log" ]]; then if ! rapids-build-metrics-reporter.py 2> /dev/null && [ ! -f rapids-build-metrics-reporter.py ]; then echo "Downloading rapids-build-metrics-reporter.py" curl -sO https://raw.githubusercontent.com/rapidsai/build-metrics-reporter/v1/rapids-build-metrics-reporter.py @@ -454,13 +471,13 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has MSG="${MSG}
libraft.so size: $LIBRAFT_FS" fi BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIBRAFT_BUILD_DIR}"} - echo "The HTML report can be found at [${BMR_DIR}/ninja_log.html]. In CI, this report" + echo "The HTML report can be found at [${BMR_DIR}/${BUILD_REPORT_METRICS}.html]. In CI, this report" echo "will also be uploaded to the appropriate subdirectory of https://downloads.rapids.ai/ci/raft/, and" echo "the entire URL can be found in \"conda-cpp-build\" runs under the task \"Upload additional artifacts\"" mkdir -p ${BMR_DIR} MSG_OUTFILE="$(mktemp)" echo "$MSG" > "${MSG_OUTFILE}" - PATH=".:$PATH" python rapids-build-metrics-reporter.py ${LIBRAFT_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html + PATH=".:$PATH" python rapids-build-metrics-reporter.py ${LIBRAFT_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/${BUILD_REPORT_METRICS}.html cp ${LIBRAFT_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log fi fi diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh index 2bf9b428cb..7d4173e8bb 100644 --- a/conda/recipes/libraft/build_libraft.sh +++ b/conda/recipes/libraft/build_libraft.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft --allgpuarch --compile-lib --build-metrics --incl-cache-stats --no-nvtx +./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/libraft/build_libraft_nn_bench.sh index dc6250f0f4..00078792a1 100644 --- a/conda/recipes/libraft/build_libraft_nn_bench.sh +++ b/conda/recipes/libraft/build_libraft_nn_bench.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2023, NVIDIA CORPORATION. -./build.sh tests bench-ann --allgpuarch --no-nvtx +./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats cmake --install cpp/build --component ann_bench diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh index cc28f93fb8..05a2b59eb0 100644 --- a/conda/recipes/libraft/build_libraft_tests.sh +++ b/conda/recipes/libraft/build_libraft_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh tests bench-prims --allgpuarch --no-nvtx +./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats cmake --install cpp/build --component testing From 7c5b63845b929da5f35fd7711d5547f726ed7dbe Mon Sep 17 00:00:00 2001 From: Micka Date: Fri, 14 Apr 2023 01:16:56 +0200 Subject: [PATCH 10/78] Fix IVF-PQ API to use `device_vector_view` (#1384) This PR mainly intends to replace `device_matrix_view` for `ivf_pq::extend` to `device_vector_view`. There are also a few updates to the documentation to reflect the current API. The order of the arguments in the API is not touched. Authors: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1384 --- cpp/include/raft/neighbors/ivf_flat.cuh | 23 ++++---- cpp/include/raft/neighbors/ivf_pq.cuh | 8 +-- .../raft/neighbors/specializations/ivf_pq.cuh | 34 +++++------ cpp/include/raft_runtime/neighbors/ivf_pq.hpp | 40 ++++++------- cpp/src/neighbors/ivfpq_build.cu | 58 +++++++++---------- .../ivfpq_extend_float_int64_t.cu | 22 +++---- .../ivfpq_extend_int8_t_int64_t.cu | 22 +++---- .../ivfpq_extend_uint8_t_int64_t.cu | 22 +++---- cpp/test/neighbors/ann_ivf_pq.cuh | 5 +- .../pylibraft/neighbors/ivf_flat/ivf_flat.pyx | 2 +- .../neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 12 ++-- .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 22 +++++-- 12 files changed, 140 insertions(+), 130 deletions(-) diff --git a/cpp/include/raft/neighbors/ivf_flat.cuh b/cpp/include/raft/neighbors/ivf_flat.cuh index c573676504..f12062f851 100644 --- a/cpp/include/raft/neighbors/ivf_flat.cuh +++ b/cpp/include/raft/neighbors/ivf_flat.cuh @@ -94,12 +94,11 @@ auto build(raft::device_resources const& handle, * // use default search parameters * ivf_flat::search_params search_params; * // search K nearest neighbours for each of the N queries - * ivf_flat::search(handle, index, queries, out_inds, out_dists, search_params, k); + * ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists); * @endcode * * @tparam value_t data element type * @tparam idx_t type of the indices in the source dataset - * @tparam int_t precision / type of integral arguments * * @param[in] handle * @param[in] params configure the index building @@ -139,13 +138,11 @@ auto build(raft::device_resources const& handle, * // use default search parameters * ivf_flat::search_params search_params; * // search K nearest neighbours for each of the N queries - * ivf_flat::search(handle, index, queries, out_inds, out_dists, search_params, k); + * ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists); * @endcode * * @tparam value_t data element type * @tparam idx_t type of the indices in the source dataset - * @tparam int_t precision / type of integral arguments - * @tparam matrix_idx_t matrix indexing type * * @param[in] handle * @param[in] params configure the index building @@ -232,7 +229,8 @@ auto extend(raft::device_resources const& handle, * // train the index from a [N, D] dataset * auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset); * // fill the index with the data - * auto index = ivf_flat::extend(handle, index_empty, dataset); + * std::optional> no_op = std::nullopt; + * auto index = ivf_flat::extend(handle, index_empty, no_op, dataset); * @endcode * * @tparam value_t data element type @@ -240,7 +238,7 @@ auto extend(raft::device_resources const& handle, * * @param[in] handle * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()] - * @param[in] new_indices optional raft::device_matrix_view to a vector of indices [n_rows]. + * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows]. * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` * here to imply a continuous range `[0...n_rows)`. * @param[in] orig_index original index @@ -314,7 +312,7 @@ void extend(raft::device_resources const& handle, * index_params.add_data_on_build = false; // don't populate index on build * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training * // train the index from a [N, D] dataset - * auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset); + * auto index_empty = ivf_flat::build(handle, index_params, dataset); * // fill the index with the data * std::optional> no_op = std::nullopt; * ivf_flat::extend(handle, dataset, no_opt, &index_empty); @@ -325,7 +323,7 @@ void extend(raft::device_resources const& handle, * * @param[in] handle * @param[in] new_vectors raft::device_matrix_view to a row-major matrix [n_rows, index.dim()] - * @param[in] new_indices optional raft::device_matrix_view to a vector of indices [n_rows]. + * @param[in] new_indices optional raft::device_vector_view to a vector of indices [n_rows]. * If the original index is empty (`orig_index.size() == 0`), you can pass `std::nullopt` * here to imply a continuous range `[0...n_rows)`. * @param[inout] index pointer to index, to be overwritten in-place @@ -422,15 +420,14 @@ void search(raft::device_resources const& handle, * ivf_flat::search_params search_params; * // Use the same allocator across multiple searches to reduce the number of * // cuda memory allocations - * ivf_flat::search(handle, index, queries1, out_inds1, out_dists1, search_params, K); - * ivf_flat::search(handle, index, queries2, out_inds2, out_dists2, search_params, K); - * ivf_flat::search(handle, index, queries3, out_inds3, out_dists3, search_params, K); + * ivf_flat::search(handle, search_params, index, queries1, out_inds1, out_dists1); + * ivf_flat::search(handle, search_params, index, queries2, out_inds2, out_dists2); + * ivf_flat::search(handle, search_params, index, queries3, out_inds3, out_dists3); * ... * @endcode * * @tparam value_t data element type * @tparam idx_t type of the indices - * @tparam int_t precision / type of integral arguments * * @param[in] handle * @param[in] params configure the search diff --git a/cpp/include/raft/neighbors/ivf_pq.cuh b/cpp/include/raft/neighbors/ivf_pq.cuh index 4a12ca72a4..934643e0af 100644 --- a/cpp/include/raft/neighbors/ivf_pq.cuh +++ b/cpp/include/raft/neighbors/ivf_pq.cuh @@ -69,7 +69,7 @@ index build(raft::device_resources const& handle, * * @param[in] handle * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] - * @param[in] new_indices a device matrix view to a vector of indices [n_rows]. + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` * here to imply a continuous range `[0...n_rows)`. * @param[inout] idx @@ -77,7 +77,7 @@ index build(raft::device_resources const& handle, template index extend(raft::device_resources const& handle, raft::device_matrix_view new_vectors, - std::optional> new_indices, + std::optional> new_indices, const index& idx) { ASSERT(new_vectors.extent(1) == idx.dim(), @@ -104,7 +104,7 @@ index extend(raft::device_resources const& handle, * * @param[in] handle * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] - * @param[in] new_indices a device matrix view to a vector of indices [n_rows]. + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` * here to imply a continuous range `[0...n_rows)`. * @param[inout] idx @@ -112,7 +112,7 @@ index extend(raft::device_resources const& handle, template void extend(raft::device_resources const& handle, raft::device_matrix_view new_vectors, - std::optional> new_indices, + std::optional> new_indices, index* idx) { ASSERT(new_vectors.extent(1) == idx->dim(), diff --git a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh index 55a7cd5858..9209f5095d 100644 --- a/cpp/include/raft/neighbors/specializations/ivf_pq.cuh +++ b/cpp/include/raft/neighbors/specializations/ivf_pq.cuh @@ -35,24 +35,22 @@ namespace raft::neighbors::ivf_pq { // We define overloads for build and extend with void return type. This is used in the Cython // wrappers, where exception handling is not compatible with return type that has nontrivial // constructor. -#define RAFT_DECL_BUILD_EXTEND(T, IdxT) \ - extern template auto build(raft::device_resources const&, \ - const raft::neighbors::ivf_pq::index_params&, \ - raft::device_matrix_view) \ - ->raft::neighbors::ivf_pq::index; \ - \ - extern template auto extend( \ - raft::device_resources const&, \ - raft::device_matrix_view, \ - std::optional>, \ - const raft::neighbors::ivf_pq::index&) \ - ->raft::neighbors::ivf_pq::index; \ - \ - extern template void extend( \ - raft::device_resources const&, \ - raft::device_matrix_view, \ - std::optional>, \ - raft::neighbors::ivf_pq::index*); +#define RAFT_DECL_BUILD_EXTEND(T, IdxT) \ + extern template auto build(raft::device_resources const&, \ + const raft::neighbors::ivf_pq::index_params&, \ + raft::device_matrix_view) \ + ->raft::neighbors::ivf_pq::index; \ + \ + extern template auto extend(raft::device_resources const&, \ + raft::device_matrix_view, \ + std::optional>, \ + const raft::neighbors::ivf_pq::index&) \ + ->raft::neighbors::ivf_pq::index; \ + \ + extern template void extend(raft::device_resources const&, \ + raft::device_matrix_view, \ + std::optional>, \ + raft::neighbors::ivf_pq::index*); RAFT_DECL_BUILD_EXTEND(float, int64_t) RAFT_DECL_BUILD_EXTEND(int8_t, int64_t) diff --git a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp index fb22d7657e..17260b0ded 100644 --- a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp +++ b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp @@ -23,26 +23,26 @@ namespace raft::runtime::neighbors::ivf_pq { // We define overloads for build and extend with void return type. This is used in the Cython // wrappers, where exception handling is not compatible with return type that has nontrivial // constructor. -#define RAFT_DECL_BUILD_EXTEND(T, IdxT) \ - [[nodiscard]] raft::neighbors::ivf_pq::index build( \ - raft::device_resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset); \ - \ - void build(raft::device_resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset, \ - raft::neighbors::ivf_pq::index* idx); \ - \ - [[nodiscard]] raft::neighbors::ivf_pq::index extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const raft::neighbors::ivf_pq::index& idx); \ - \ - void extend(raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ +#define RAFT_DECL_BUILD_EXTEND(T, IdxT) \ + [[nodiscard]] raft::neighbors::ivf_pq::index build( \ + raft::device_resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset); \ + \ + void build(raft::device_resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset, \ + raft::neighbors::ivf_pq::index* idx); \ + \ + [[nodiscard]] raft::neighbors::ivf_pq::index extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const raft::neighbors::ivf_pq::index& idx); \ + \ + void extend(raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ raft::neighbors::ivf_pq::index* idx); RAFT_DECL_BUILD_EXTEND(float, int64_t); diff --git a/cpp/src/neighbors/ivfpq_build.cu b/cpp/src/neighbors/ivfpq_build.cu index 8759ca2587..7f91e34969 100644 --- a/cpp/src/neighbors/ivfpq_build.cu +++ b/cpp/src/neighbors/ivfpq_build.cu @@ -20,35 +20,35 @@ namespace raft::runtime::neighbors::ivf_pq { -#define RAFT_INST_BUILD_EXTEND(T, IdxT) \ - raft::neighbors::ivf_pq::index build( \ - raft::device_resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset) \ - { \ - return raft::neighbors::ivf_pq::build(handle, params, dataset); \ - } \ - void build(raft::device_resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset, \ - raft::neighbors::ivf_pq::index* idx) \ - { \ - *idx = raft::neighbors::ivf_pq::build(handle, params, dataset); \ - } \ - raft::neighbors::ivf_pq::index extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const raft::neighbors::ivf_pq::index& idx) \ - { \ - return raft::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, idx); \ - } \ - void extend(raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - raft::neighbors::ivf_pq::index* idx) \ - { \ - raft::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, idx); \ +#define RAFT_INST_BUILD_EXTEND(T, IdxT) \ + raft::neighbors::ivf_pq::index build( \ + raft::device_resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset) \ + { \ + return raft::neighbors::ivf_pq::build(handle, params, dataset); \ + } \ + void build(raft::device_resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset, \ + raft::neighbors::ivf_pq::index* idx) \ + { \ + *idx = raft::neighbors::ivf_pq::build(handle, params, dataset); \ + } \ + raft::neighbors::ivf_pq::index extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const raft::neighbors::ivf_pq::index& idx) \ + { \ + return raft::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, idx); \ + } \ + void extend(raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + raft::neighbors::ivf_pq::index* idx) \ + { \ + raft::neighbors::ivf_pq::extend(handle, new_vectors, new_indices, idx); \ } RAFT_INST_BUILD_EXTEND(float, int64_t); diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu index 4cc616f32d..584bbfc45c 100644 --- a/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu +++ b/cpp/src/neighbors/specializations/ivfpq_extend_float_int64_t.cu @@ -19,17 +19,17 @@ namespace raft::neighbors::ivf_pq { -#define RAFT_MAKE_INSTANCE(T, IdxT) \ - template auto extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const index& idx) \ - ->index; \ - template void extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ +#define RAFT_MAKE_INSTANCE(T, IdxT) \ + template auto extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const index& idx) \ + ->index; \ + template void extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ index* idx); RAFT_MAKE_INSTANCE(float, int64_t); diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu index a3117aae0f..00311a77e4 100644 --- a/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu +++ b/cpp/src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu @@ -19,17 +19,17 @@ namespace raft::neighbors::ivf_pq { -#define RAFT_MAKE_INSTANCE(T, IdxT) \ - template auto extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const index& idx) \ - ->index; \ - template void extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ +#define RAFT_MAKE_INSTANCE(T, IdxT) \ + template auto extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const index& idx) \ + ->index; \ + template void extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ index* idx); RAFT_MAKE_INSTANCE(int8_t, int64_t); diff --git a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu index a5e3d68569..11524886f0 100644 --- a/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu @@ -19,17 +19,17 @@ namespace raft::neighbors::ivf_pq { -#define RAFT_MAKE_INSTANCE(T, IdxT) \ - template auto extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const index& idx) \ - ->index; \ - template void extend( \ - raft::device_resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ +#define RAFT_MAKE_INSTANCE(T, IdxT) \ + template auto extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const index& idx) \ + ->index; \ + template void extend( \ + raft::device_resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ index* idx); RAFT_MAKE_INSTANCE(uint8_t, int64_t); diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index c331081314..c69829821a 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -210,13 +210,12 @@ class ivf_pq_test : public ::testing::TestWithParam { auto idx = ivf_pq::build(handle_, ipams, database_view); auto vecs_2_view = raft::make_device_matrix_view(vecs_2, size_2, ps.dim); - auto inds_2_view = raft::make_device_matrix_view(inds_2, size_2, 1); + auto inds_2_view = raft::make_device_vector_view(inds_2, size_2); ivf_pq::extend(handle_, vecs_2_view, inds_2_view, &idx); auto vecs_1_view = raft::make_device_matrix_view(vecs_1, size_1, ps.dim); - auto inds_1_view = - raft::make_device_matrix_view(inds_1, size_1, 1); + auto inds_1_view = raft::make_device_vector_view(inds_1, size_1); ivf_pq::extend(handle_, vecs_1_view, inds_1_view, &idx); return idx; } diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx index db279ad2db..352376fe17 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx @@ -427,7 +427,7 @@ def extend(Index index, new_vectors, new_indices, handle=None): Trained ivf_flat object. new_vectors : CUDA array interface compliant matrix shape (n_samples, dim) Supported dtype [float, int8, uint8] - new_indices : CUDA array interface compliant matrix shape (n_samples, dim) + new_indices : CUDA array interface compliant vector shape (n_samples) Supported dtype [int64] {handle_docstring} diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index d04d833f3b..531c2428e9 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -29,7 +29,11 @@ from libcpp.string cimport string from rmm._lib.memory_resource cimport device_memory_resource -from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major +from pylibraft.common.cpp.mdspan cimport ( + device_matrix_view, + device_vector_view, + row_major, +) from pylibraft.common.handle cimport device_resources from pylibraft.common.optional cimport optional from pylibraft.distance.distance_type cimport DistanceType @@ -126,19 +130,19 @@ cdef extern from "raft_runtime/neighbors/ivf_pq.hpp" \ cdef void extend( const device_resources& handle, device_matrix_view[float, int64_t, row_major] new_vectors, - optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices, + optional[device_vector_view[int64_t, int64_t]] new_indices, index[int64_t]* index) except + cdef void extend( const device_resources& handle, device_matrix_view[int8_t, int64_t, row_major] new_vectors, - optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices, + optional[device_vector_view[int64_t, int64_t]] new_indices, index[int64_t]* index) except + cdef void extend( const device_resources& handle, device_matrix_view[uint8_t, int64_t, row_major] new_vectors, - optional[device_matrix_view[int64_t, int64_t, row_major]] new_indices, + optional[device_vector_view[int64_t, int64_t]] new_indices, index[int64_t]* index) except + cdef void search( diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 1906c569f6..b89e5dd44d 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -51,10 +51,16 @@ from rmm._lib.memory_resource cimport ( cimport pylibraft.neighbors.ivf_flat.cpp.c_ivf_flat as c_ivf_flat cimport pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq as c_ivf_pq +from pylibraft.common.optional cimport make_optional, optional from pylibraft.neighbors.common import _check_input_array, _get_metric -from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major +from pylibraft.common.cpp.mdspan cimport ( + device_matrix_view, + device_vector_view, + make_device_vector_view, + row_major, +) from pylibraft.common.mdspan cimport ( get_dmv_float, get_dmv_int8, @@ -416,7 +422,7 @@ def extend(Index index, new_vectors, new_indices, handle=None): Trained ivf_pq object. new_vectors : array interface compliant matrix shape (n_samples, dim) Supported dtype [float, int8, uint8] - new_indices : array interface compliant matrix shape (n_samples, dim) + new_indices : array interface compliant vector shape (n_samples) Supported dtype [int64] {handle_docstring} @@ -472,6 +478,7 @@ def extend(Index index, new_vectors, new_indices, handle=None): vecs_cai = wrap_array(new_vectors) vecs_dt = vecs_cai.dtype + cdef optional[device_vector_view[int64_t, int64_t]] new_indices_opt cdef int64_t n_rows = vecs_cai.shape[0] cdef uint32_t dim = vecs_cai.shape[1] @@ -484,23 +491,28 @@ def extend(Index index, new_vectors, new_indices, handle=None): if len(idx_cai.shape)!=1: raise ValueError("Indices array is expected to be 1D") + if index.index.size() > 0: + new_indices_opt = make_device_vector_view( + idx_cai.data, + idx_cai.shape[0]) + if vecs_dt == np.float32: with cuda_interruptible(): c_ivf_pq.extend(deref(handle_), get_dmv_float(vecs_cai, check_shape=True), - make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)), # noqa: E501 + new_indices_opt, index.index) elif vecs_dt == np.int8: with cuda_interruptible(): c_ivf_pq.extend(deref(handle_), get_dmv_int8(vecs_cai, check_shape=True), - make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)), # noqa: E501 + new_indices_opt, index.index) elif vecs_dt == np.uint8: with cuda_interruptible(): c_ivf_pq.extend(deref(handle_), get_dmv_uint8(vecs_cai, check_shape=True), - make_optional_view_int64(get_dmv_int64(idx_cai, check_shape=False)), # noqa: E501 + new_indices_opt, index.index) else: raise TypeError("query dtype %s not supported" % vecs_dt) From c950854af7cb21e63cf6d161d4d04970a7ebef3b Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 13 Apr 2023 19:44:31 -0400 Subject: [PATCH 11/78] Adding base header-only conda package without cuda math libs (#1386) cc @MatthiasKohl @bdice Making sure CI agrees w/ this change. @MatthiasKohl, if CI succeeds here let's try to plug the resulting conda packages into a cugraph-ops PR to make sure cugraph-ops CI is happy as well. Authors: - Corey J. Nolet (https://github.com/cjnolet) - Robert Maynard (https://github.com/robertmaynard) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1386 --- build.sh | 2 +- conda/recipes/libraft/meta.yaml | 51 ++++++--------- cpp/CMakeLists.txt | 63 ++++++++++++++----- cpp/bench/ann/CMakeLists.txt | 1 + .../raft/linalg/detail/cublas_wrappers.hpp | 8 ++- .../detail/modularity_maximization.hpp | 18 ------ cpp/test/CMakeLists.txt | 3 +- docs/source/build.md | 24 ++++--- 8 files changed, 89 insertions(+), 81 deletions(-) diff --git a/build.sh b/build.sh index 039f0ed6a5..ab904abdad 100755 --- a/build.sh +++ b/build.sh @@ -522,7 +522,7 @@ fi # Initiate build for example RAFT application template (if needed) if hasArg template; then - pushd cpp/template + pushd ${REPODIR}/cpp/template ./build.sh popd fi diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml index ccd7314484..8ec9cc10c6 100644 --- a/conda/recipes/libraft/meta.yaml +++ b/conda/recipes/libraft/meta.yaml @@ -16,7 +16,7 @@ source: git_url: ../../.. outputs: - - name: libraft-headers + - name: libraft-headers-only version: {{ version }} script: build_libraft_headers.sh build: @@ -50,20 +50,26 @@ outputs: - ninja - sysroot_{{ target_platform }} {{ sysroot_version }} host: - - cuda-profiler-api {{ cuda_profiler_api_host_version }} - - cudatoolkit ={{ cuda_version }} - - libcublas {{ libcublas_host_version }} - - libcublas-dev {{ libcublas_host_version }} - - libcurand {{ libcurand_host_version }} - - libcurand-dev {{ libcurand_host_version }} - - libcusolver {{ libcusolver_host_version }} - - libcusolver-dev {{ libcusolver_host_version }} - - libcusparse {{ libcusparse_host_version }} - - libcusparse-dev {{ libcusparse_host_version }} - librmm ={{ minor_version }} + - cudatoolkit {{ cuda_version }} + about: + home: https://rapids.ai/ + license: Apache-2.0 + summary: libraft-headers-only library + - name: libraft-headers + version: {{ version }} + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + - {{ compiler('cuda') }} + - librmm + requirements: run: - - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} + - {{ pin_subpackage('libraft-headers-only', exact=True) }} - cuda-profiler-api {{ cuda_profiler_api_run_version }} + - cudatoolkit {{ cuda_version }} + - librmm ={{ minor_version }} - libcublas {{ libcublas_run_version }} - libcublas-dev {{ libcublas_run_version }} - libcurand {{ libcurand_run_version }} @@ -72,7 +78,6 @@ outputs: - libcusolver-dev {{ libcusolver_run_version }} - libcusparse {{ libcusparse_run_version }} - libcusparse-dev {{ libcusparse_run_version }} - - librmm ={{ minor_version }} about: home: https://rapids.ai/ license: Apache-2.0 @@ -130,7 +135,6 @@ outputs: - sysroot_{{ target_platform }} {{ sysroot_version }} host: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} - cuda-profiler-api {{ cuda_profiler_api_host_version }} - gmock {{ gtest_version }} - gtest {{ gtest_version }} @@ -144,7 +148,6 @@ outputs: - libcusparse-dev {{ libcusparse_host_version }} run: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} - gmock {{ gtest_version }} - gtest {{ gtest_version }} about: @@ -170,19 +173,10 @@ outputs: - sysroot_{{ target_platform }} {{ sysroot_version }} host: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} - - cuda-profiler-api {{ cuda_profiler_api_host_version }} - libcublas {{ libcublas_host_version }} - libcublas-dev {{ libcublas_host_version }} - - libcurand {{ libcurand_host_version }} - - libcurand-dev {{ libcurand_host_version }} - - libcusolver {{ libcusolver_host_version }} - - libcusolver-dev {{ libcusolver_host_version }} - - libcusparse {{ libcusparse_host_version }} - - libcusparse-dev {{ libcusparse_host_version }} run: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} about: home: https://rapids.ai/ license: Apache-2.0 @@ -206,23 +200,14 @@ outputs: - sysroot_{{ target_platform }} {{ sysroot_version }} host: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} - - cuda-profiler-api {{ cuda_profiler_api_host_version }} - libcublas {{ libcublas_host_version }} - libcublas-dev {{ libcublas_host_version }} - - libcurand {{ libcurand_host_version }} - - libcurand-dev {{ libcurand_host_version }} - - libcusolver {{ libcusolver_host_version }} - - libcusolver-dev {{ libcusolver_host_version }} - - libcusparse {{ libcusparse_host_version }} - - libcusparse-dev {{ libcusparse_host_version }} - glog {{ glog_version }} - nlohmann_json {{ nlohmann_json_version }} - libfaiss>=1.7.1 - faiss-proc=*=cuda run: - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_subpackage('libraft-headers', exact=True) }} - glog {{ glog_version }} - faiss-proc=*=cuda - libfaiss {{ faiss_version }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 144f58c4d6..6461492169 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -179,17 +179,7 @@ target_include_directories( ) # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. -target_link_libraries( - raft - INTERFACE rmm::rmm - cuco::cuco - nvidia::cutlass::cutlass - CUDA::cublas${_ctk_static_suffix} - CUDA::curand${_ctk_static_suffix} - CUDA::cusolver${_ctk_static_suffix} - CUDA::cusparse${_ctk_static_suffix} - raft::Thrust -) +target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust) target_compile_features(raft INTERFACE cxx_std_17 $) target_compile_options( @@ -197,6 +187,15 @@ target_compile_options( --expt-relaxed-constexpr> ) +set(RAFT_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix}) +set(RAFT_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix}) +set(RAFT_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix}) +set(RAFT_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix}) + +set(RAFT_CTK_MATH_DEPENDENCIES ${RAFT_CUBLAS_DEPENDENCY} ${RAFT_CUSOLVER_DEPENDENCY} + ${RAFT_CUSPARSE_DEPENDENCY} ${RAFT_CURAND_DEPENDENCY} +) + # Endian detection include(TestBigEndian) test_big_endian(BIG_ENDIAN) @@ -454,7 +453,13 @@ if(RAFT_COMPILE_LIBRARY) INTERFACE_POSITION_INDEPENDENT_CODE ON ) - target_link_libraries(raft_lib PUBLIC raft::raft $) + target_link_libraries( + raft_lib + PUBLIC raft::raft + ${RAFT_CTK_MATH_DEPENDENCIES} # TODO: Once `raft::resources` is used everywhere, this + # will just be cublas + $ + ) target_compile_options( raft_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>" "$<$:${RAFT_CUDA_FLAGS}>" @@ -606,13 +611,39 @@ if(TARGET raft_lib) list(APPEND raft_export_sets raft-compiled-lib-exports) endif() +string( + APPEND + code_string + [=[ + option(RAFT_ENABLE_CUSOLVER_DEPENDENCY "Enable cusolver dependency" ON) + option(RAFT_ENABLE_CUBLAS_DEPENDENCY "Enable cublas dependency" ON) + option(RAFT_ENABLE_CURAND_DEPENDENCY "Enable curand dependency" ON) + option(RAFT_ENABLE_CUSPARSE_DEPENDENCY "Enable cusparse dependency" ON) + +mark_as_advanced(RAFT_ENABLE_CUSOLVER_DEPENDENCY) +mark_as_advanced(RAFT_ENABLE_CUBLAS_DEPENDENCY) +mark_as_advanced(RAFT_ENABLE_CURAND_DEPENDENCY) +mark_as_advanced(RAFT_ENABLE_CUSPARSE_DEPENDENCY) + +target_link_libraries(raft::raft INTERFACE + $<$:${RAFT_CUSOLVER_DEPENDENCY}> + $<$:${RAFT_CUBLAS_DEPENDENCY}> + $<$:${RAFT_CUSPARSE_DEPENDENCY}> + $<$:${RAFT_CURAND_DEPENDENCY}> +) +]=] +) + +# Use `rapids_export` for 22.04 as it will have COMPONENT support rapids_export( INSTALL raft EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed - NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string + NAMESPACE raft:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string ) # ################################################################################################## @@ -622,8 +653,10 @@ rapids_export( EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} - GLOBAL_TARGETS raft - compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string + GLOBAL_TARGETS raft compiled distributed + DOCUMENTATION doc_string + NAMESPACE raft:: + FINAL_CODE_BLOCK code_string ) # ################################################################################################## diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 6267be518e..a14018a15d 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -80,6 +80,7 @@ function(ConfigureAnnBench) $<$:NCCL::NCCL> ${ConfigureAnnBench_LINKS} Threads::Threads + ${RAFT_CTK_MATH_DEPENDENCIES} $ $ ) diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 03975b1b7d..87a195757c 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -965,7 +965,8 @@ inline cublasStatus_t cublasdot(cublasHandle_t handle, cudaStream_t stream) { RAFT_CUBLAS_TRY(cublasSetStream(handle, stream)); - return cublasSdot(handle, n, x, incx, y, incy, result); + return cublasDotEx( + handle, n, x, CUDA_R_32F, incx, y, CUDA_R_32F, incy, result, CUDA_R_32F, CUDA_R_32F); } template <> @@ -979,7 +980,8 @@ inline cublasStatus_t cublasdot(cublasHandle_t handle, cudaStream_t stream) { RAFT_CUBLAS_TRY(cublasSetStream(handle, stream)); - return cublasDdot(handle, n, x, incx, y, incy, result); + return cublasDotEx( + handle, n, x, CUDA_R_64F, incx, y, CUDA_R_64F, incy, result, CUDA_R_64F, CUDA_R_64F); } /** @} */ diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp index 160664bae8..d81c64b257 100644 --- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp @@ -32,24 +32,6 @@ #include #include -#ifdef COLLECT_TIME_STATISTICS -#include -#include -#include -#include -#include -#endif - -#ifdef COLLECT_TIME_STATISTICS -static double timer(void) -{ - struct timeval tv; - cudaDeviceSynchronize(); - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; -} -#endif - namespace raft { namespace spectral { namespace detail { diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 9109d84fe4..22e8a9d73c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -31,12 +31,13 @@ function(ConfigureTest) target_link_libraries( ${TEST_NAME} - PRIVATE raft::raft + PRIVATE raft raft_internal $<$:raft::compiled> GTest::gtest GTest::gtest_main Threads::Threads + ${RAFT_CTK_MATH_DEPENDENCIES} $ $ ) diff --git a/docs/source/build.md b/docs/source/build.md index d7550eb631..262c5703bc 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -151,18 +151,22 @@ make -j install RAFT's cmake has the following configurable flags available:. -| Flag | Possible Values | Default Value | Behavior | -|---------------------------|----------------------| --- | --- | -| BUILD_TESTS | ON, OFF | ON | Compile Googletests | -| BUILD_PRIMS_BENCH | ON, OFF | OFF | Compile benchmarks | +| Flag | Possible Values | Default Value | Behavior | +|---------------------------------|----------------------| --- |------------------------------------------------------------------------------| +| BUILD_TESTS | ON, OFF | ON | Compile Googletests | +| BUILD_PRIMS_BENCH | ON, OFF | OFF | Compile benchmarks | | BUILD_ANN_BENCH | ON, OFF | OFF | Compile end-to-end ANN benchmarks | -| raft_FIND_COMPONENTS | compiled distributed | | Configures the optional components as a space-separated list | | RAFT_COMPILE_LIBRARY | ON, OFF | ON if either BUILD_TESTS or BUILD_PRIMS_BENCH is ON; otherwise OFF | Compiles all `libraft` shared libraries (these are required for Googletests) | -| DETECT_CONDA_ENV | ON, OFF | ON | Enable detection of conda environment for dependencies | -| RAFT_NVTX | ON, OFF | OFF | Enable NVTX Markers | -| CUDA_ENABLE_KERNELINFO | ON, OFF | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer` | -| CUDA_ENABLE_LINEINFO | ON, OFF | OFF | Enable the -lineinfo option for nvcc | -| CUDA_STATIC_RUNTIME | ON, OFF | OFF | Statically link the CUDA runtime | +| raft_FIND_COMPONENTS | compiled distributed | | Configures the optional components as a space-separated list | +| RAFT_ENABLE_CUBLAS_DEPENDENCY | ON, OFF | ON | Link against cublas library in `raft::raft` | +| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF | ON | Link against cusolver library in `raft::raft` | +| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF | ON | Link against cusparse library in `raft::raft` | +| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF | ON | Link against curand library in `raft::raft` | +| DETECT_CONDA_ENV | ON, OFF | ON | Enable detection of conda environment for dependencies | +| RAFT_NVTX | ON, OFF | OFF | Enable NVTX Markers | +| CUDA_ENABLE_KERNELINFO | ON, OFF | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer` | +| CUDA_ENABLE_LINEINFO | ON, OFF | OFF | Enable the -lineinfo option for nvcc | +| CUDA_STATIC_RUNTIME | ON, OFF | OFF | Statically link the CUDA runtime | Currently, shared libraries are provided for the `libraft-nn` and `libraft-distance` components. From bd69713f84e34839881de00bfc61a7f32504dc05 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Mon, 17 Apr 2023 19:22:12 +0200 Subject: [PATCH 12/78] Use ARC V2 self-hosted runners for GPU jobs (#1410) This PR is updating the runner labels to use ARC V2 self-hosted runners for GPU jobs. This is needed to resolve the auto-scalling issues. Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1410 --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bec89ab888..0f5f84c158 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -60,7 +60,7 @@ jobs: uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: branch - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 8175b4fbc7..fc8c8d516e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -60,7 +60,7 @@ jobs: uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.06 with: build_type: pull-request - node_type: "gpu-latest-1" + node_type: "gpu-v100-latest-1" arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/build_docs.sh" From c7a72bea63b4c57c40cc545ce95fb9c0252d1995 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 17 Apr 2023 11:34:47 -0700 Subject: [PATCH 13/78] Fix is_min_close (#1419) Correlation and Cosine distance both return (1 - similarity) in the pairwise distances apis, meaning that is_min_close is returning the wrong sort order for them. Fix. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1419 --- cpp/include/raft/distance/distance_types.hpp | 2 -- cpp/include/raft/sparse/neighbors/detail/knn.cuh | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/distance/distance_types.hpp b/cpp/include/raft/distance/distance_types.hpp index 4060147f1d..d17ef358ee 100644 --- a/cpp/include/raft/distance/distance_types.hpp +++ b/cpp/include/raft/distance/distance_types.hpp @@ -74,8 +74,6 @@ inline bool is_min_close(DistanceType metric) bool select_min; switch (metric) { case DistanceType::InnerProduct: - case DistanceType::CosineExpanded: - case DistanceType::CorrelationExpanded: // Similarity metrics have the opposite meaning, i.e. nearest neighbors are those with larger // similarity (See the same logic at cpp/include/raft/sparse/spatial/detail/knn.cuh:362 // {perform_k_selection}) diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh index 7bedec9830..f9f07c13ca 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh @@ -355,8 +355,7 @@ class sparse_knn_t { // want to adjust k. value_idx n_neighbors = std::min(static_cast(k), batch_cols); - bool ascending = true; - if (metric == raft::distance::DistanceType::InnerProduct) ascending = false; + bool ascending = raft::distance::is_min_close(metric); // kernel to slice first (min) k cols and copy into batched merge buffer raft::spatial::knn::select_k(batch_dists, @@ -425,4 +424,4 @@ class sparse_knn_t { raft::device_resources const& handle; }; -}; // namespace raft::sparse::neighbors::detail \ No newline at end of file +}; // namespace raft::sparse::neighbors::detail From ba207a05d1b4ce35338ca5a7c395d8773d98ca89 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Mon, 17 Apr 2023 22:06:33 +0200 Subject: [PATCH 14/78] IVF-PQ: manipulating individual lists (#1298) Add public functions for reading and writing into individual ivf-pq lists (clusters), in the input space (reconstructed data) and in flat PQ codes. Partially solves (IVF-PQ) https://github.com/rapidsai/raft/issues/1205 Authors: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1298 --- .../raft/neighbors/detail/ivf_pq_build.cuh | 731 ++++++++++++++---- .../neighbors/detail/ivf_pq_codepacking.cuh | 214 +++++ cpp/include/raft/neighbors/ivf_pq.cuh | 2 +- cpp/include/raft/neighbors/ivf_pq_helpers.cuh | 409 ++++++++++ cpp/test/neighbors/ann_ivf_pq.cuh | 199 ++++- 5 files changed, 1390 insertions(+), 165 deletions(-) create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh create mode 100644 cpp/include/raft/neighbors/ivf_pq_helpers.cuh diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 1a563d213e..36ceccc36f 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -18,6 +18,7 @@ #include +#include #include #include @@ -60,63 +61,6 @@ namespace raft::neighbors::ivf_pq::detail { using namespace raft::spatial::knn::detail; // NOLINT -/** A chunk of PQ-encoded vector managed by one CUDA thread. */ -using pq_vec_t = TxN_t::io_t; - -namespace { - -/** - * This type mimics the `uint8_t&` for the indexing operator of `bitfield_view_t`. - * - * @tparam Bits number of bits comprising the value. - */ -template -struct bitfield_ref_t { - static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte"); - constexpr static uint8_t kMask = static_cast((1u << Bits) - 1u); - uint8_t* ptr; - uint32_t offset; - - constexpr operator uint8_t() // NOLINT - { - auto pair = static_cast(ptr[0]); - if (offset + Bits > 8) { pair |= static_cast(ptr[1]) << 8; } - return static_cast((pair >> offset) & kMask); - } - - constexpr auto operator=(uint8_t code) -> bitfield_ref_t& - { - if (offset + Bits > 8) { - auto pair = static_cast(ptr[0]); - pair |= static_cast(ptr[1]) << 8; - pair &= ~(static_cast(kMask) << offset); - pair |= static_cast(code) << offset; - ptr[0] = static_cast(Pow2<256>::mod(pair)); - ptr[1] = static_cast(Pow2<256>::div(pair)); - } else { - ptr[0] = (ptr[0] & ~(kMask << offset)) | (code << offset); - } - return *this; - } -}; - -/** - * View a byte array as an array of unsigned integers of custom small bit size. - * - * @tparam Bits number of bits comprising a single element of the array. - */ -template -struct bitfield_view_t { - static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte"); - uint8_t* raw; - - constexpr auto operator[](uint32_t i) -> bitfield_ref_t - { - uint32_t bit_offset = i * Bits; - return bitfield_ref_t{raw + Pow2<8>::div(bit_offset), Pow2<8>::mod(bit_offset)}; - } -}; - template __launch_bounds__(BlockDim) __global__ void copy_warped_kernel( T* out, uint32_t ld_out, const S* in, uint32_t ld_in, uint32_t n_cols, size_t n_rows) @@ -162,8 +106,6 @@ void copy_warped(T* out, <<>>(out, ld_out, in, ld_in, n_cols, n_rows); } -} // namespace - /** * @brief Fill-in a random orthogonal transformation matrix. * @@ -276,7 +218,7 @@ void flat_compute_residuals( device_matrix_view rotation_matrix, // [rot_dim, dim] device_matrix_view centers, // [n_lists, dim_ext] const T* dataset, // [n_rows, dim] - const uint32_t* labels, // [n_rows] + std::variant labels, // [n_rows] rmm::mr::device_memory_resource* device_memory) { auto stream = handle.get_stream(); @@ -287,7 +229,9 @@ void flat_compute_residuals( linalg::map_offset(handle, tmp_view, [centers, dataset, labels, dim] __device__(size_t i) { auto row_ix = i / dim; auto el_ix = i % dim; - auto label = labels[row_ix]; + auto label = std::holds_alternative(labels) + ? std::get(labels) + : std::get(labels)[row_ix]; return utils::mapping{}(dataset[i]) - centers(label, el_ix); }); @@ -558,11 +502,363 @@ void train_per_cluster(raft::device_resources const& handle, } /** - * Compute the code: find the closest cluster in each pq_dim-subspace. + * A helper function: given the dataset in the rotated space + * [n_rows, rot_dim] = [n_rows, pq_dim * pq_len], + * reinterpret the last dimension as two: [n_rows, pq_dim, pq_len] + * + * @tparam T + * @tparam IdxT + * + * @param vectors input data [n_rows, rot_dim] + * @param pq_centers codebook (used to infer the structure - pq_len) + * @return reinterpreted vectors [n_rows, pq_dim, pq_len] + */ +template +static __device__ auto reinterpret_vectors( + device_matrix_view vectors, + device_mdspan, row_major> pq_centers) + -> device_mdspan, row_major> +{ + const uint32_t pq_len = pq_centers.extent(1); + const uint32_t pq_dim = vectors.extent(1) / pq_len; + using layout_t = typename decltype(vectors)::layout_type; + using accessor_t = typename decltype(vectors)::accessor_type; + return mdspan, layout_t, accessor_t>( + vectors.data_handle(), extent_3d{vectors.extent(0), pq_dim, pq_len}); +} + +/** + * A consumer for the `run_on_list` and `run_on_vector` that just flattens PQ codes + * one-per-byte. That is, independent of the code width (pq_bits), one code uses + * the whole byte, hence one vectors uses pq_dim bytes. + */ +struct unpack_codes { + device_matrix_view out_codes; + + /** + * Create a callable to be passed to `run_on_list`. + * + * @param[out] out_codes the destination for the read codes. + */ + __device__ inline unpack_codes(device_matrix_view out_codes) + : out_codes{out_codes} + { + } + + /** Write j-th component (code) of the i-th vector into the output array. */ + __device__ inline void operator()(uint8_t code, uint32_t i, uint32_t j) + { + out_codes(i, j) = code; + } +}; + +template +__launch_bounds__(BlockSize) __global__ void unpack_list_data_kernel( + device_matrix_view out_codes, + device_mdspan::list_extents, row_major> in_list_data, + std::variant offset_or_indices) +{ + const uint32_t pq_dim = out_codes.extent(1); + auto unpack_action = unpack_codes{out_codes}; + run_on_list(in_list_data, offset_or_indices, out_codes.extent(0), pq_dim, unpack_action); +} + +/** + * Unpack flat PQ codes from an existing list by the given offset. + * + * @param[out] codes flat PQ codes, one code per byte [n_rows, pq_dim] + * @param[in] list_data the packed ivf::list data. + * @param[in] offset_or_indices how many records in the list to skip or the exact indices. + * @param[in] pq_bits codebook size (1 << pq_bits) + * @param[in] stream + */ +inline void unpack_list_data( + device_matrix_view codes, + device_mdspan::list_extents, row_major> list_data, + std::variant offset_or_indices, + uint32_t pq_bits, + rmm::cuda_stream_view stream) +{ + auto n_rows = codes.extent(0); + if (n_rows == 0) { return; } + + constexpr uint32_t kBlockSize = 256; + dim3 blocks(div_rounding_up_safe(n_rows, kBlockSize), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto kernel = [pq_bits]() { + switch (pq_bits) { + case 4: return unpack_list_data_kernel; + case 5: return unpack_list_data_kernel; + case 6: return unpack_list_data_kernel; + case 7: return unpack_list_data_kernel; + case 8: return unpack_list_data_kernel; + default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits); + } + }(); + kernel<<>>(codes, list_data, offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/** Unpack the list data; see the public interface for the api and usage. */ +template +void unpack_list_data(raft::device_resources const& res, + const index& index, + device_matrix_view out_codes, + uint32_t label, + std::variant offset_or_indices) +{ + unpack_list_data(out_codes, + index.lists()[label]->data.view(), + offset_or_indices, + index.pq_bits(), + res.get_stream()); +} + +/** A consumer for the `run_on_list` and `run_on_vector` that approximates the original input data. + */ +struct reconstruct_vectors { + codebook_gen codebook_kind; + uint32_t cluster_ix; + uint32_t pq_len; + device_mdspan, row_major> pq_centers; + device_mdspan, row_major> centers_rot; + device_mdspan, row_major> out_vectors; + + /** + * Create a callable to be passed to `run_on_list`. + * + * @param[out] out_vectors the destination for the decoded vectors. + * @param[in] pq_centers the codebook + * @param[in] centers_rot + * @param[in] codebook_kind + * @param[in] cluster_ix label/id of the cluster. + */ + __device__ inline reconstruct_vectors( + device_matrix_view out_vectors, + device_mdspan, row_major> pq_centers, + device_matrix_view centers_rot, + codebook_gen codebook_kind, + uint32_t cluster_ix) + : codebook_kind{codebook_kind}, + cluster_ix{cluster_ix}, + pq_len{pq_centers.extent(1)}, + pq_centers{pq_centers}, + centers_rot{reinterpret_vectors(centers_rot, pq_centers)}, + out_vectors{reinterpret_vectors(out_vectors, pq_centers)} + { + } + + /** + * Decode j-th component of the i-th vector by its code and write it into a chunk of the output + * vectors (pq_len elements). + */ + __device__ inline void operator()(uint8_t code, uint32_t i, uint32_t j) + { + uint32_t partition_ix; + switch (codebook_kind) { + case codebook_gen::PER_CLUSTER: { + partition_ix = cluster_ix; + } break; + case codebook_gen::PER_SUBSPACE: { + partition_ix = j; + } break; + default: __builtin_unreachable(); + } + for (uint32_t k = 0; k < pq_len; k++) { + out_vectors(i, j, k) = pq_centers(partition_ix, k, code) + centers_rot(cluster_ix, j, k); + } + } +}; + +template +__launch_bounds__(BlockSize) __global__ void reconstruct_list_data_kernel( + device_matrix_view out_vectors, + device_mdspan::list_extents, row_major> in_list_data, + device_mdspan, row_major> pq_centers, + device_matrix_view centers_rot, + codebook_gen codebook_kind, + uint32_t cluster_ix, + std::variant offset_or_indices) +{ + const uint32_t pq_dim = out_vectors.extent(1) / pq_centers.extent(1); + auto reconstruct_action = + reconstruct_vectors{out_vectors, pq_centers, centers_rot, codebook_kind, cluster_ix}; + run_on_list( + in_list_data, offset_or_indices, out_vectors.extent(0), pq_dim, reconstruct_action); +} + +/** Decode the list data; see the public interface for the api and usage. */ +template +void reconstruct_list_data(raft::device_resources const& res, + const index& index, + device_matrix_view out_vectors, + uint32_t label, + std::variant offset_or_indices) +{ + auto n_rows = out_vectors.extent(0); + if (n_rows == 0) { return; } + auto& list = index.lists()[label]; + if (std::holds_alternative(offset_or_indices)) { + auto n_skip = std::get(offset_or_indices); + // sic! I'm using the upper bound `list.size` instead of exact `list_sizes(label)` + // to avoid an extra device-host data copy and the stream sync. + RAFT_EXPECTS(n_skip + n_rows <= list->size.load(), + "offset + output size must be not bigger than the cluster size."); + } + + auto tmp = make_device_mdarray( + res, res.get_workspace_resource(), make_extents(n_rows, index.rot_dim())); + + constexpr uint32_t kBlockSize = 256; + dim3 blocks(div_rounding_up_safe(n_rows, kBlockSize), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto kernel = [](uint32_t pq_bits) { + switch (pq_bits) { + case 4: return reconstruct_list_data_kernel; + case 5: return reconstruct_list_data_kernel; + case 6: return reconstruct_list_data_kernel; + case 7: return reconstruct_list_data_kernel; + case 8: return reconstruct_list_data_kernel; + default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits); + } + }(index.pq_bits()); + kernel<<>>(tmp.view(), + list->data.view(), + index.pq_centers(), + index.centers_rot(), + index.codebook_kind(), + label, + offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + float* out_float_ptr = nullptr; + rmm::device_uvector out_float_buf(0, res.get_stream(), res.get_workspace_resource()); + if constexpr (std::is_same_v) { + out_float_ptr = out_vectors.data_handle(); + } else { + out_float_buf.resize(size_t{n_rows} * size_t{index.dim()}, res.get_stream()); + out_float_ptr = out_float_buf.data(); + } + // Rotate the results back to the original space + float alpha = 1.0; + float beta = 0.0; + linalg::gemm(res, + false, + false, + index.dim(), + n_rows, + index.rot_dim(), + &alpha, + index.rotation_matrix().data_handle(), + index.dim(), + tmp.data_handle(), + index.rot_dim(), + &beta, + out_float_ptr, + index.dim(), + res.get_stream()); + // Transform the data to the original type, if necessary + if constexpr (!std::is_same_v) { + linalg::map(res, + out_vectors, + utils::mapping{}, + make_device_matrix_view(out_float_ptr, n_rows, index.dim())); + } +} + +/** + * A producer for the `write_list` and `write_vector` reads the codes byte-by-byte. That is, + * independent of the code width (pq_bits), one code uses the whole byte, hence one vectors uses + * pq_dim bytes. + */ +struct pass_codes { + device_matrix_view codes; + + /** + * Create a callable to be passed to `run_on_list`. + * + * @param[in] codes the source codes. + */ + __device__ inline pass_codes(device_matrix_view codes) + : codes{codes} + { + } + + /** Read j-th component (code) of the i-th vector from the source. */ + __device__ inline auto operator()(uint32_t i, uint32_t j) const -> uint8_t { return codes(i, j); } +}; + +template +__launch_bounds__(BlockSize) __global__ void pack_list_data_kernel( + device_mdspan::list_extents, row_major> list_data, + device_matrix_view codes, + std::variant offset_or_indices) +{ + write_list( + list_data, offset_or_indices, codes.extent(0), codes.extent(1), pass_codes{codes}); +} + +/** + * Write flat PQ codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_rows). + * + * @param[out] list_data the packed ivf::list data. + * @param[in] codes flat PQ codes, one code per byte [n_rows, pq_dim] + * @param[in] offset_or_indices how many records in the list to skip or the exact indices. + * @param[in] pq_bits codebook size (1 << pq_bits) + * @param[in] stream + */ +inline void pack_list_data( + device_mdspan::list_extents, row_major> list_data, + device_matrix_view codes, + std::variant offset_or_indices, + uint32_t pq_bits, + rmm::cuda_stream_view stream) +{ + auto n_rows = codes.extent(0); + if (n_rows == 0) { return; } + + constexpr uint32_t kBlockSize = 256; + dim3 blocks(div_rounding_up_safe(n_rows, kBlockSize), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto kernel = [pq_bits]() { + switch (pq_bits) { + case 4: return pack_list_data_kernel; + case 5: return pack_list_data_kernel; + case 6: return pack_list_data_kernel; + case 7: return pack_list_data_kernel; + case 8: return pack_list_data_kernel; + default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits); + } + }(); + kernel<<>>(list_data, codes, offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template +void pack_list_data(raft::device_resources const& res, + index* index, + device_matrix_view new_codes, + uint32_t label, + std::variant offset_or_indices) +{ + pack_list_data(index->lists()[label]->data.view(), + new_codes, + offset_or_indices, + index->pq_bits(), + res.get_stream()); +} + +/** + * + * A producer for the `write_list` and `write_vector` that encodes level-1 input vector residuals + * into lvl-2 PQ codes. + * Computing a PQ code means finding the closest cluster in a pq_dim-subspace. * * @tparam SubWarpSize * how many threads work on a single vector; - * bouded by either WarpSize or pq_book_size. + * bounded by either WarpSize or pq_book_size. * * @param pq_centers * - codebook_gen::PER_SUBSPACE: [pq_dim , pq_len, pq_book_size] @@ -574,56 +870,75 @@ void train_per_cluster(raft::device_resources const& handle, * @param j index along pq_dim "dimension" * @param cluster_ix is used for PER_CLUSTER codebooks. */ -template -__device__ auto compute_pq_code( - device_mdspan, row_major> pq_centers, - device_mdspan, row_major> new_vector, - codebook_gen codebook_kind, - uint32_t j, - uint32_t cluster_ix) -> uint8_t -{ - using subwarp_align = Pow2; - uint32_t lane_id = subwarp_align::mod(laneId()); - uint32_t partition_ix; - switch (codebook_kind) { - case codebook_gen::PER_CLUSTER: { - partition_ix = cluster_ix; - } break; - case codebook_gen::PER_SUBSPACE: { - partition_ix = j; - } break; - default: __builtin_unreachable(); +/** + */ +template +struct encode_vectors { + codebook_gen codebook_kind; + uint32_t cluster_ix; + device_mdspan, row_major> pq_centers; + device_mdspan, row_major> in_vectors; + + __device__ inline encode_vectors( + device_mdspan, row_major> pq_centers, + device_matrix_view in_vectors, + codebook_gen codebook_kind, + uint32_t cluster_ix) + : codebook_kind{codebook_kind}, + cluster_ix{cluster_ix}, + pq_centers{pq_centers}, + in_vectors{reinterpret_vectors(in_vectors, pq_centers)} + { } - const uint32_t pq_book_size = pq_centers.extent(2); - const uint32_t pq_len = pq_centers.extent(1); - float min_dist = std::numeric_limits::infinity(); - uint8_t code = 0; - // calculate the distance for each PQ cluster, find the minimum for each thread - for (uint32_t i = lane_id; i < pq_book_size; i += subwarp_align::Value) { - // NB: the L2 quantifiers on residuals are always trained on L2 metric. - float d = 0.0f; - for (uint32_t k = 0; k < pq_len; k++) { - auto t = new_vector(j, k) - pq_centers(partition_ix, k, i); - d += t * t; + /** + * Decode j-th component of the i-th vector by its code and write it into a chunk of the output + * vectors (pq_len elements). + */ + __device__ inline auto operator()(IdxT i, uint32_t j) -> uint8_t + { + uint32_t lane_id = Pow2::mod(laneId()); + uint32_t partition_ix; + switch (codebook_kind) { + case codebook_gen::PER_CLUSTER: { + partition_ix = cluster_ix; + } break; + case codebook_gen::PER_SUBSPACE: { + partition_ix = j; + } break; + default: __builtin_unreachable(); } - if (d < min_dist) { - min_dist = d; - code = uint8_t(i); + + const uint32_t pq_book_size = pq_centers.extent(2); + const uint32_t pq_len = pq_centers.extent(1); + float min_dist = std::numeric_limits::infinity(); + uint8_t code = 0; + // calculate the distance for each PQ cluster, find the minimum for each thread + for (uint32_t l = lane_id; l < pq_book_size; l += SubWarpSize) { + // NB: the L2 quantifiers on residuals are always trained on L2 metric. + float d = 0.0f; + for (uint32_t k = 0; k < pq_len; k++) { + auto t = in_vectors(i, j, k) - pq_centers(partition_ix, k, l); + d += t * t; + } + if (d < min_dist) { + min_dist = d; + code = uint8_t(l); + } } - } - // reduce among threads + // reduce among threads #pragma unroll - for (uint32_t stride = SubWarpSize >> 1; stride > 0; stride >>= 1) { - const auto other_dist = shfl_xor(min_dist, stride, SubWarpSize); - const auto other_code = shfl_xor(code, stride, SubWarpSize); - if (other_dist < min_dist) { - min_dist = other_dist; - code = other_code; + for (uint32_t stride = SubWarpSize >> 1; stride > 0; stride >>= 1) { + const auto other_dist = shfl_xor(min_dist, stride, SubWarpSize); + const auto other_code = shfl_xor(code, stride, SubWarpSize); + if (other_dist < min_dist) { + min_dist = other_dist; + code = other_code; + } } + return code; } - return code; -} +}; template __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel( @@ -639,7 +954,7 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel( constexpr uint32_t kSubWarpSize = std::min(WarpSize, 1u << PqBits); using subwarp_align = Pow2; const uint32_t lane_id = subwarp_align::mod(threadIdx.x); - const IdxT row_ix = subwarp_align::div(IdxT{threadIdx.x} + IdxT{blockDim.x} * IdxT{blockIdx.x}); + const IdxT row_ix = subwarp_align::div(IdxT{threadIdx.x} + IdxT{BlockSize} * IdxT{blockIdx.x}); if (row_ix >= new_vectors.extent(0)) { return; } const uint32_t cluster_ix = new_labels[row_ix]; @@ -647,7 +962,7 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel( if (lane_id == 0) { out_ix = atomicAdd(&list_sizes(cluster_ix), 1); } out_ix = shfl(out_ix, 0, kSubWarpSize); - // write the label + // write the label (one record per subwarp) auto pq_indices = inds_ptrs(cluster_ix); if (lane_id == 0) { if (std::holds_alternative(src_offset_or_indices)) { @@ -657,40 +972,81 @@ __launch_bounds__(BlockSize) __global__ void process_and_fill_codes_kernel( } } - // write the codes - using group_align = Pow2; - const uint32_t group_ix = group_align::div(out_ix); - const uint32_t ingroup_ix = group_align::mod(out_ix); - const uint32_t pq_len = pq_centers.extent(1); - const uint32_t pq_dim = new_vectors.extent(1) / pq_len; - + // write the codes (one record per subwarp): + const uint32_t pq_dim = new_vectors.extent(1) / pq_centers.extent(1); auto pq_extents = list_spec{PqBits, pq_dim, true}.make_list_extents(out_ix + 1); - auto pq_extents_vectorized = - make_extents(pq_extents.extent(0), pq_extents.extent(1), pq_extents.extent(2)); - auto pq_dataset = make_mdspan( - reinterpret_cast(data_ptrs[cluster_ix]), pq_extents_vectorized); - - __shared__ pq_vec_t codes[subwarp_align::div(BlockSize)]; - pq_vec_t& code = codes[subwarp_align::div(threadIdx.x)]; - bitfield_view_t out{reinterpret_cast(&code)}; - constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits; - for (uint32_t j = 0, i = 0; j < pq_dim; i++) { - // clear the chunk for writing - if (lane_id == 0) { code = pq_vec_t{}; } - // fill-in the values, one/pq_dim at a time -#pragma unroll - for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) { - // find the label - using layout_t = typename decltype(new_vectors)::layout_type; - using accessor_t = typename decltype(new_vectors)::accessor_type; - auto one_vector = mdspan, layout_t, accessor_t>( - &new_vectors(row_ix, 0), extent_2d{pq_dim, pq_len}); - auto l = compute_pq_code(pq_centers, one_vector, codebook_kind, j, cluster_ix); - if (lane_id == 0) { out[k] = l; } + auto pq_dataset = + make_mdspan(data_ptrs[cluster_ix], pq_extents); + write_vector( + pq_dataset, + out_ix, + row_ix, + pq_dim, + encode_vectors{pq_centers, new_vectors, codebook_kind, cluster_ix}); +} + +template +__launch_bounds__(BlockSize) __global__ void encode_list_data_kernel( + device_mdspan::list_extents, row_major> list_data, + device_matrix_view new_vectors, + device_mdspan, row_major> pq_centers, + codebook_gen codebook_kind, + uint32_t cluster_ix, + std::variant offset_or_indices) +{ + constexpr uint32_t kSubWarpSize = std::min(WarpSize, 1u << PqBits); + const uint32_t pq_dim = new_vectors.extent(1) / pq_centers.extent(1); + auto encode_action = + encode_vectors{pq_centers, new_vectors, codebook_kind, cluster_ix}; + write_list( + list_data, offset_or_indices, new_vectors.extent(0), pq_dim, encode_action); +} + +template +void encode_list_data(raft::device_resources const& res, + index* index, + device_matrix_view new_vectors, + uint32_t label, + std::variant offset_or_indices) +{ + auto n_rows = new_vectors.extent(0); + if (n_rows == 0) { return; } + + auto mr = res.get_workspace_resource(); + + auto new_vectors_residual = + make_device_mdarray(res, mr, make_extents(n_rows, index->rot_dim())); + + flat_compute_residuals(res, + new_vectors_residual.data_handle(), + n_rows, + index->rotation_matrix(), + index->centers(), + new_vectors.data_handle(), + label, + mr); + + constexpr uint32_t kBlockSize = 256; + const uint32_t threads_per_vec = std::min(WarpSize, index->pq_book_size()); + dim3 blocks(div_rounding_up_safe(n_rows, kBlockSize / threads_per_vec), 1, 1); + dim3 threads(kBlockSize, 1, 1); + auto kernel = [](uint32_t pq_bits) { + switch (pq_bits) { + case 4: return encode_list_data_kernel; + case 5: return encode_list_data_kernel; + case 6: return encode_list_data_kernel; + case 7: return encode_list_data_kernel; + case 8: return encode_list_data_kernel; + default: RAFT_FAIL("Invalid pq_bits (%u), the value must be within [4, 8]", pq_bits); } - // write the chunk into the dataset - if (lane_id == 0) { pq_dataset(group_ix, i, ingroup_ix) = code; } - } + }(index->pq_bits()); + kernel<<>>(index->lists()[label]->data.view(), + new_vectors_residual.view(), + index->pq_centers(), + index->codebook_kind(), + label, + offset_or_indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); } /** @@ -732,14 +1088,14 @@ void process_and_fill_codes(raft::device_resources const& handle, auto new_vectors_residual = make_device_mdarray(handle, mr, make_extents(n_rows, index.rot_dim())); - flat_compute_residuals(handle, - new_vectors_residual.data_handle(), - n_rows, - index.rotation_matrix(), - index.centers(), - new_vectors, - new_labels, - mr); + flat_compute_residuals(handle, + new_vectors_residual.data_handle(), + n_rows, + index.rotation_matrix(), + index.centers(), + new_vectors, + new_labels, + mr); constexpr uint32_t kBlockSize = 256; const uint32_t threads_per_vec = std::min(WarpSize, index.pq_book_size()); @@ -819,6 +1175,85 @@ void recompute_internal_state(const raft::device_resources& res, index& in } } +/** + * Helper function: allocate enough space in the list, compute the offset, at which to start + * writing, and fill-in indices. + * + * @return offset for writing the data + */ +template +auto extend_list_prepare(raft::device_resources const& res, + index* index, + device_vector_view new_indices, + uint32_t label) -> uint32_t +{ + uint32_t n_rows = new_indices.extent(0); + uint32_t offset; + // Allocate the lists to fit the new data + copy(&offset, index->list_sizes().data_handle() + label, 1, res.get_stream()); + res.sync_stream(); + uint32_t new_size = offset + n_rows; + copy(index->list_sizes().data_handle() + label, &new_size, 1, res.get_stream()); + auto spec = list_spec{ + index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()}; + auto& list = index->lists()[label]; + ivf::resize_list(res, list, spec, new_size, offset); + copy(list->indices.data_handle() + offset, new_indices.data_handle(), n_rows, res.get_stream()); + return offset; +} + +/** + * Extend one list of the index in-place, by the list label, skipping the classification and + * encoding steps. + * See the public interface for the api and usage. + */ +template +void extend_list_with_codes(raft::device_resources const& res, + index* index, + device_matrix_view new_codes, + device_vector_view new_indices, + uint32_t label) +{ + // Allocate memory and write indices + auto offset = extend_list_prepare(res, index, new_indices, label); + // Pack the data + pack_list_data(res, index, new_codes, label, offset); + // Update the pointers and the sizes + recompute_internal_state(res, *index); +} + +/** + * Extend one list of the index in-place, by the list label, skipping the classification step. + * See the public interface for the api and usage. + */ +template +void extend_list(raft::device_resources const& res, + index* index, + device_matrix_view new_vectors, + device_vector_view new_indices, + uint32_t label) +{ + // Allocate memory and write indices + auto offset = extend_list_prepare(res, index, new_indices, label); + // Encode the data + encode_list_data(res, index, new_vectors, label, offset); + // Update the pointers and the sizes + recompute_internal_state(res, *index); +} + +/** + * Remove all data from a single list. + * See the public interface for the api and usage. + */ +template +void erase_list(raft::device_resources const& res, index* index, uint32_t label) +{ + uint32_t zero = 0; + copy(index->list_sizes().data_handle() + label, &zero, 1, res.get_stream()); + index->lists()[label].reset(); + recompute_internal_state(res, *index); +} + /** Copy the state of an index into a new index, but share the list data among the two. */ template auto clone(const raft::device_resources& res, const index& source) -> index diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh new file mode 100644 index 0000000000..52969dd176 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/ivf_pq_codepacking.cuh @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace raft::neighbors::ivf_pq::detail { + +/** A chunk of PQ-encoded vector managed by one CUDA thread. */ +using pq_vec_t = TxN_t::io_t; + +/** + * This type mimics the `uint8_t&` for the indexing operator of `bitfield_view_t`. + * + * @tparam Bits number of bits comprising the value. + */ +template +struct bitfield_ref_t { + static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte"); + constexpr static uint8_t kMask = static_cast((1u << Bits) - 1u); + uint8_t* ptr; + uint32_t offset; + + constexpr operator uint8_t() // NOLINT + { + auto pair = static_cast(ptr[0]); + if (offset + Bits > 8) { pair |= static_cast(ptr[1]) << 8; } + return static_cast((pair >> offset) & kMask); + } + + constexpr auto operator=(uint8_t code) -> bitfield_ref_t& + { + if (offset + Bits > 8) { + auto pair = static_cast(ptr[0]); + pair |= static_cast(ptr[1]) << 8; + pair &= ~(static_cast(kMask) << offset); + pair |= static_cast(code) << offset; + ptr[0] = static_cast(Pow2<256>::mod(pair)); + ptr[1] = static_cast(Pow2<256>::div(pair)); + } else { + ptr[0] = (ptr[0] & ~(kMask << offset)) | (code << offset); + } + return *this; + } +}; + +/** + * View a byte array as an array of unsigned integers of custom small bit size. + * + * @tparam Bits number of bits comprising a single element of the array. + */ +template +struct bitfield_view_t { + static_assert(Bits <= 8 && Bits > 0, "Bit code must fit one byte"); + uint8_t* raw; + + constexpr auto operator[](uint32_t i) -> bitfield_ref_t + { + uint32_t bit_offset = i * Bits; + return bitfield_ref_t{raw + Pow2<8>::div(bit_offset), Pow2<8>::mod(bit_offset)}; + } +}; + +/** + * Process a single vector in a list. + * + * @tparam PqBits + * @tparam Action tells how to process a single vector (e.g. reconstruct or just unpack) + * + * @param[in] in_list_data the encoded cluster data. + * @param[in] in_ix in-cluster index of the vector to be decoded (one-per-thread). + * @param[in] out_ix the output index passed to the action + * @param[in] pq_dim + * @param action a callable action to be invoked on each PQ code (component of the encoding) + * type: void (uint8_t code, uint32_t out_ix, uint32_t j), where j = [0..pq_dim). + */ +template +__device__ void run_on_vector( + device_mdspan::list_extents, row_major> in_list_data, + uint32_t in_ix, + uint32_t out_ix, + uint32_t pq_dim, + Action action) +{ + using group_align = Pow2; + const uint32_t group_ix = group_align::div(in_ix); + const uint32_t ingroup_ix = group_align::mod(in_ix); + + pq_vec_t code_chunk; + bitfield_view_t code_view{reinterpret_cast(&code_chunk)}; + constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits; + for (uint32_t j = 0, i = 0; j < pq_dim; i++) { + // read the chunk + code_chunk = *reinterpret_cast(&in_list_data(group_ix, i, ingroup_ix, 0)); + // read the codes, one/pq_dim at a time +#pragma unroll + for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) { + // read a piece of the reconstructed vector + action(code_view[k], out_ix, j); + } + } +} + +/** + * Process a single vector in a list. + * + * @tparam PqBits + * @tparam SubWarpSize how many threads work on the same ix (only the first thread writes data). + * @tparam IdxT type of the index passed to the action + * @tparam Action tells how to process a single vector (e.g. encode or just pack) + * + * @param[in] out_list_data the encoded cluster data. + * @param[in] out_ix in-cluster index of the vector to be processed (one-per-SubWarpSize threads). + * @param[in] in_ix the input index passed to the action (one-per-SubWarpSize threads). + * @param[in] pq_dim + * @param action a callable action to be invoked on each PQ code (component of the encoding) + * type: (uint32_t in_ix, uint32_t j) -> uint8_t, where j = [0..pq_dim). + */ +template +__device__ void write_vector( + device_mdspan::list_extents, row_major> out_list_data, + uint32_t out_ix, + IdxT in_ix, + uint32_t pq_dim, + Action action) +{ + const uint32_t lane_id = Pow2::mod(threadIdx.x); + + using group_align = Pow2; + const uint32_t group_ix = group_align::div(out_ix); + const uint32_t ingroup_ix = group_align::mod(out_ix); + + pq_vec_t code_chunk; + bitfield_view_t code_view{reinterpret_cast(&code_chunk)}; + constexpr uint32_t kChunkSize = (sizeof(pq_vec_t) * 8u) / PqBits; + for (uint32_t j = 0, i = 0; j < pq_dim; i++) { + // clear the chunk + if (lane_id == 0) { code_chunk = pq_vec_t{}; } + // write the codes, one/pq_dim at a time +#pragma unroll + for (uint32_t k = 0; k < kChunkSize && j < pq_dim; k++, j++) { + // write a single code + uint8_t code = action(in_ix, j); + if (lane_id == 0) { code_view[k] = code; } + } + // write the chunk to the list + if (lane_id == 0) { + *reinterpret_cast(&out_list_data(group_ix, i, ingroup_ix, 0)) = code_chunk; + } + } +} + +/** Process the given indices or a block of a single list (cluster). */ +template +__device__ void run_on_list( + device_mdspan::list_extents, row_major> in_list_data, + std::variant offset_or_indices, + uint32_t len, + uint32_t pq_dim, + Action action) +{ + for (uint32_t ix = threadIdx.x + blockDim.x * blockIdx.x; ix < len; ix += blockDim.x) { + const uint32_t src_ix = std::holds_alternative(offset_or_indices) + ? std::get(offset_or_indices) + ix + : std::get(offset_or_indices)[ix]; + run_on_vector(in_list_data, src_ix, ix, pq_dim, action); + } +} + +/** Process the given indices or a block of a single list (cluster). */ +template +__device__ void write_list( + device_mdspan::list_extents, row_major> out_list_data, + std::variant offset_or_indices, + uint32_t len, + uint32_t pq_dim, + Action action) +{ + using subwarp_align = Pow2; + uint32_t stride = subwarp_align::div(blockDim.x); + uint32_t ix = subwarp_align::div(threadIdx.x + blockDim.x * blockIdx.x); + for (; ix < len; ix += stride) { + const uint32_t dst_ix = std::holds_alternative(offset_or_indices) + ? std::get(offset_or_indices) + ix + : std::get(offset_or_indices)[ix]; + write_vector(out_list_data, dst_ix, ix, pq_dim, action); + } +} + +} // namespace raft::neighbors::ivf_pq::detail diff --git a/cpp/include/raft/neighbors/ivf_pq.cuh b/cpp/include/raft/neighbors/ivf_pq.cuh index 934643e0af..dfc24e8214 100644 --- a/cpp/include/raft/neighbors/ivf_pq.cuh +++ b/cpp/include/raft/neighbors/ivf_pq.cuh @@ -234,7 +234,7 @@ auto build(raft::device_resources const& handle, * @brief Build a new index containing the data of the original plus new extra vectors. * * Implementation note: - * The new data is clustered according to existing kmeans clusters, then the cluster + * The new data is clustered according to existing kmeans clusters, the cluster * centers are unchanged. * * Usage example: diff --git a/cpp/include/raft/neighbors/ivf_pq_helpers.cuh b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh new file mode 100644 index 0000000000..398bd545f1 --- /dev/null +++ b/cpp/include/raft/neighbors/ivf_pq_helpers.cuh @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace raft::neighbors::ivf_pq::helpers { +/** + * @defgroup ivf_pq_helpers Helper functions for manipulationg IVF PQ Index + * @{ + */ + +namespace codepacker { +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Bit compression is removed, which means output will have pq_dim dimensional vectors (one code per + * byte, instead of ceildiv(pq_dim * pq_bits, 8) bytes of pq codes). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the output + * uint32_t n_take = 4; + * auto codes = raft::make_device_matrix(res, n_take, index.pq_dim()); + * uint32_t offset = 0; + * // unpack n_take elements from the list + * ivf_pq::helpers::codepacker::unpack(res, list_data, index.pq_bits(), offset, codes.view()); + * @endcode + * + * @tparam IdxT type of the indices in the source dataset + * + * @param[in] res raft resource + * @param[in] list_data block to read from + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset + * How many records in the list to skip. + * @param[out] codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + */ +inline void unpack( + raft::device_resources const& res, + device_mdspan::list_extents, row_major> list_data, + uint32_t pq_bits, + uint32_t offset, + device_matrix_view codes) +{ + ivf_pq::detail::unpack_list_data(codes, list_data, offset, pq_bits, res.get_stream()); +} + +/** + * Write flat PQ codes into an existing list by the given offset. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * auto list_data = index.lists()[label]->data.view(); + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.pq_dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_pq::helpers::codepacker::pack( + * res, make_const_mdspan(codes.view()), index.pq_bits(), 42, list_data); + * @endcode + * + * @param[in] res + * @param[in] codes flat PQ codes, one code per byte [n_vec, pq_dim] + * @param[in] pq_bits bit length of encoded vector elements + * @param[in] offset how many records to skip before writing the data into the list + * @param[in] list_data block to write into + */ +inline void pack( + raft::device_resources const& res, + device_matrix_view codes, + uint32_t pq_bits, + uint32_t offset, + device_mdspan::list_extents, row_major> list_data) +{ + ivf_pq::detail::pack_list_data(list_data, codes, offset, pq_bits, res.get_stream()); +} +} // namespace codepacker + +/** + * Write flat PQ codes into an existing list by the given offset. + * + * The list is identified by its label. + * + * NB: no memory allocation happens here; the list must fit the data (offset + n_vec). + * + * Usage example: + * @code{.cpp} + * // We will write into the 137th cluster + * uint32_t label = 137; + * // allocate the buffer for the input codes + * auto codes = raft::make_device_matrix(res, n_vec, index.pq_dim()); + * ... prepare n_vecs to pack into the list in codes ... + * // write codes into the list starting from the 42nd position + * ivf_pq::helpers::pack_list_data(res, &index, codes_to_pack, label, 42); + * @endcode + * + * @param[in] res + * @param[inout] index IVF-PQ index. + * @param[in] codes flat PQ codes, one code per byte [n_rows, pq_dim] + * @param[in] label The id of the list (cluster) into which we write. + * @param[in] offset how many records to skip before writing the data into the list + */ +template +void pack_list_data(raft::device_resources const& res, + index* index, + device_matrix_view codes, + uint32_t label, + uint32_t offset) +{ + ivf_pq::detail::pack_list_data(res, index, codes, label, offset); +} + +/** + * @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`, one code per byte (independently of pq_bits). + * + * Usage example: + * @code{.cpp} + * // We will unpack the fourth cluster + * uint32_t label = 3; + * // Get the list size + * uint32_t list_size = 0; + * raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream()); + * res.sync_stream(); + * // allocate the buffer for the output + * auto codes = raft::make_device_matrix(res, list_size, index.pq_dim()); + * // unpack the whole list + * ivf_pq::helpers::unpack_list_data(res, index, codes.view(), label, 0); + * @endcode + * + * @tparam IdxT type of the indices in the source dataset + * + * @param[in] res + * @param[in] index + * @param[out] out_codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + * @param[in] offset + * How many records in the list to skip. + */ +template +void unpack_list_data(raft::device_resources const& res, + const index& index, + device_matrix_view out_codes, + uint32_t label, + uint32_t offset) +{ + return ivf_pq::detail::unpack_list_data(res, index, out_codes, label, offset); +} + +/** + * @brief Unpack a series of records of a single list (cluster) in the compressed index + * by their in-list offsets, one code per byte (independently of pq_bits). + * + * Usage example: + * @code{.cpp} + * // We will unpack the fourth cluster + * uint32_t label = 3; + * // Create the selection vector + * auto selected_indices = raft::make_device_vector(res, 4); + * ... fill the indices ... + * res.sync_stream(); + * // allocate the buffer for the output + * auto codes = raft::make_device_matrix(res, selected_indices.size(), index.pq_dim()); + * // decode the whole list + * ivf_pq::helpers::unpack_list_data( + * res, index, selected_indices.view(), codes.view(), label); + * @endcode + * + * @tparam IdxT type of the indices in the source dataset + * + * @param[in] res + * @param[in] index + * @param[in] in_cluster_indices + * The offsets of the selected indices within the cluster. + * @param[out] out_codes + * the destination buffer [n_take, index.pq_dim()]. + * The length `n_take` defines how many records to unpack, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + */ +template +void unpack_list_data(raft::device_resources const& res, + const index& index, + device_vector_view in_cluster_indices, + device_matrix_view out_codes, + uint32_t label) +{ + return ivf_pq::detail::unpack_list_data(res, index, out_codes, label, in_cluster_indices); +} + +/** + * @brief Decode `n_take` consecutive records of a single list (cluster) in the compressed index + * starting at given `offset`. + * + * Usage example: + * @code{.cpp} + * // We will reconstruct the fourth cluster + * uint32_t label = 3; + * // Get the list size + * uint32_t list_size = 0; + * raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, res.get_stream()); + * res.sync_stream(); + * // allocate the buffer for the output + * auto decoded_vectors = raft::make_device_matrix(res, list_size, index.dim()); + * // decode the whole list + * ivf_pq::helpers::reconstruct_list_data(res, index, decoded_vectors.view(), label, 0); + * @endcode + * + * @tparam T data element type + * @tparam IdxT type of the indices in the source dataset + * + * @param[in] res + * @param[in] index + * @param[out] out_vectors + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to reconstruct, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + * @param[in] offset + * How many records in the list to skip. + */ +template +void reconstruct_list_data(raft::device_resources const& res, + const index& index, + device_matrix_view out_vectors, + uint32_t label, + uint32_t offset) +{ + return ivf_pq::detail::reconstruct_list_data(res, index, out_vectors, label, offset); +} + +/** + * @brief Decode a series of records of a single list (cluster) in the compressed index + * by their in-list offsets. + * + * Usage example: + * @code{.cpp} + * // We will reconstruct the fourth cluster + * uint32_t label = 3; + * // Create the selection vector + * auto selected_indices = raft::make_device_vector(res, 4); + * ... fill the indices ... + * res.sync_stream(); + * // allocate the buffer for the output + * auto decoded_vectors = raft::make_device_matrix( + * res, selected_indices.size(), index.dim()); + * // decode the whole list + * ivf_pq::helpers::reconstruct_list_data( + * res, index, selected_indices.view(), decoded_vectors.view(), label); + * @endcode + * + * @tparam T data element type + * @tparam IdxT type of the indices in the source dataset + * + * @param[in] res + * @param[in] index + * @param[in] in_cluster_indices + * The offsets of the selected indices within the cluster. + * @param[out] out_vectors + * the destination buffer [n_take, index.dim()]. + * The length `n_take` defines how many records to reconstruct, + * it must be smaller than the list size. + * @param[in] label + * The id of the list (cluster) to decode. + */ +template +void reconstruct_list_data(raft::device_resources const& res, + const index& index, + device_vector_view in_cluster_indices, + device_matrix_view out_vectors, + uint32_t label) +{ + return ivf_pq::detail::reconstruct_list_data(res, index, out_vectors, label, in_cluster_indices); +} + +/** + * @brief Extend one list of the index in-place, by the list label, skipping the classification and + * encoding steps. + * + * Usage example: + * @code{.cpp} + * // We will extend the fourth cluster + * uint32_t label = 3; + * // We will fill 4 new vectors + * uint32_t n_vec = 4; + * // Indices of the new vectors + * auto indices = raft::make_device_vector(res, n_vec); + * ... fill the indices ... + * auto new_codes = raft::make_device_matrix new_codes( + * res, n_vec, index.pq_dim()); + * ... fill codes ... + * // extend list with new codes + * ivf_pq::helpers::extend_list_with_codes( + * res, &index, codes.view(), indices.view(), label); + * @endcode + * + * @tparam IdxT + * + * @param[in] res + * @param[inout] index + * @param[in] new_codes flat PQ codes, one code per byte [n_rows, index.pq_dim()] + * @param[in] new_indices source indices [n_rows] + * @param[in] label the id of the target list (cluster). + */ +template +void extend_list_with_codes(raft::device_resources const& res, + index* index, + device_matrix_view new_codes, + device_vector_view new_indices, + uint32_t label) +{ + ivf_pq::detail::extend_list_with_codes(res, index, new_codes, new_indices, label); +} + +/** + * @brief Extend one list of the index in-place, by the list label, skipping the classification + * step. + * + * Usage example: + * @code{.cpp} + * // We will extend the fourth cluster + * uint32_t label = 3; + * // We will extend with 4 new vectors + * uint32_t n_vec = 4; + * // Indices of the new vectors + * auto indices = raft::make_device_vector(res, n_vec); + * ... fill the indices ... + * auto new_vectors = raft::make_device_matrix new_codes( + * res, n_vec, index.dim()); + * ... fill vectors ... + * // extend list with new vectors + * ivf_pq::helpers::extend_list( + * res, &index, new_vectors.view(), indices.view(), label); + * @endcode + * + * @tparam T + * @tparam IdxT + * + * @param[in] res + * @param[inout] index + * @param[in] new_vectors data to encode [n_rows, index.dim()] + * @param[in] new_indices source indices [n_rows] + * @param[in] label the id of the target list (cluster). + * + */ +template +void extend_list(raft::device_resources const& res, + index* index, + device_matrix_view new_vectors, + device_vector_view new_indices, + uint32_t label) +{ + ivf_pq::detail::extend_list(res, index, new_vectors, new_indices, label); +} + +/** + * @brief Remove all data from a single list (cluster) in the index. + * + * Usage example: + * @code{.cpp} + * // We will erase the fourth cluster (label = 3) + * ivf_pq::helpers::erase_list(res, &index, 3); + * @endcode + * + * @tparam IdxT + * @param[in] res + * @param[inout] index + * @param[in] label the id of the target list (cluster). + */ +template +void erase_list(raft::device_resources const& res, index* index, uint32_t label) +{ + ivf_pq::detail::erase_list(res, index, label); +} + +/** @} */ +} // namespace raft::neighbors::ivf_pq::helpers diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index c69829821a..07efcb099e 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -22,7 +22,11 @@ #include #include +#include +#include +#include #include +#include #include #ifdef RAFT_COMPILED #include @@ -38,8 +42,6 @@ #include #include -#include -#include #include #include @@ -115,6 +117,33 @@ inline auto operator<<(std::ostream& os, const ivf_pq_inputs& p) -> std::ostream return os; } +template +void compare_vectors_l2( + const raft::device_resources& res, T a, T b, uint32_t label, double compression_ratio, double eps) +{ + auto n_rows = a.extent(0); + auto dim = a.extent(1); + rmm::mr::managed_memory_resource managed_memory; + auto dist = make_device_mdarray(res, &managed_memory, make_extents(n_rows)); + linalg::map_offset(res, dist.view(), [a, b, dim] __device__(uint32_t i) { + spatial::knn::detail::utils::mapping f{}; + double d = 0.0f; + for (uint32_t j = 0; j < dim; j++) { + double t = f(a(i, j)) - f(b(i, j)); + d += t * t; + } + return sqrt(d / double(dim)); + }); + res.sync_stream(); + for (uint32_t i = 0; i < n_rows; i++) { + double d = dist(i); + // The theoretical estimate of the error is hard to come up with, + // the estimate below is based on experimentation + curse of dimensionality + ASSERT_LE(d, 1.2 * eps * std::pow(2.0, compression_ratio)) + << " (label = " << label << ", ix = " << i << ", eps = " << eps << ")"; + } +} + template auto min_output_size(const raft::device_resources& handle, const ivf_pq::index& index, @@ -139,7 +168,6 @@ class ivf_pq_test : public ::testing::TestWithParam { { } - protected: void gen_data() { database.resize(size_t{ps.num_db_vecs} * size_t{ps.dim}, stream_); @@ -178,7 +206,7 @@ class ivf_pq_test : public ::testing::TestWithParam { handle_.sync_stream(stream_); } - index build_only() + auto build_only() { auto ipams = ps.index_params; ipams.add_data_on_build = true; @@ -188,19 +216,17 @@ class ivf_pq_test : public ::testing::TestWithParam { return ivf_pq::build(handle_, ipams, index_view); } - index build_2_extends() + auto build_2_extends() { - rmm::device_uvector db_indices(ps.num_db_vecs, stream_); - thrust::sequence(handle_.get_thrust_policy(), - thrust::device_pointer_cast(db_indices.data()), - thrust::device_pointer_cast(db_indices.data() + ps.num_db_vecs)); + auto db_indices = make_device_vector(handle_, ps.num_db_vecs); + linalg::map_offset(handle_, db_indices.view(), identity_op{}); handle_.sync_stream(stream_); auto size_1 = IdxT(ps.num_db_vecs) / 2; auto size_2 = IdxT(ps.num_db_vecs) - size_1; auto vecs_1 = database.data(); auto vecs_2 = database.data() + size_t(size_1) * size_t(ps.dim); - auto inds_1 = db_indices.data(); - auto inds_2 = db_indices.data() + size_t(size_1); + auto inds_1 = db_indices.data_handle(); + auto inds_2 = db_indices.data_handle() + size_t(size_1); auto ipams = ps.index_params; ipams.add_data_on_build = false; @@ -220,17 +246,160 @@ class ivf_pq_test : public ::testing::TestWithParam { return idx; } - index build_serialize() + auto build_serialize() { ivf_pq::serialize(handle_, "ivf_pq_index", build_only()); return ivf_pq::deserialize(handle_, "ivf_pq_index"); } + void check_reconstruction(const index& index, + double compression_ratio, + uint32_t label, + uint32_t n_take, + uint32_t n_skip) + { + auto& rec_list = index.lists()[label]; + auto dim = index.dim(); + n_take = std::min(n_take, rec_list->size.load()); + n_skip = std::min(n_skip, rec_list->size.load() - n_take); + + if (n_take == 0) { return; } + + auto rec_data = make_device_matrix(handle_, n_take, dim); + auto orig_data = make_device_matrix(handle_, n_take, dim); + + ivf_pq::helpers::reconstruct_list_data(handle_, index, rec_data.view(), label, n_skip); + + matrix::gather(database.data(), + IdxT{dim}, + IdxT{n_take}, + rec_list->indices.data_handle() + n_skip, + IdxT{n_take}, + orig_data.data_handle(), + stream_); + + compare_vectors_l2(handle_, rec_data.view(), orig_data.view(), label, compression_ratio, 0.06); + } + + void check_reconstruct_extend(index* index, double compression_ratio, uint32_t label) + { + // NB: this is not reference, the list is retained; the index will have to create a new list on + // `erase_list` op. + auto old_list = index->lists()[label]; + auto n_rows = old_list->size.load(); + if (n_rows == 0) { return; } + + auto vectors_1 = make_device_matrix(handle_, n_rows, index->dim()); + auto indices = make_device_vector(handle_, n_rows); + copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); + + ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_1.view(), label, 0); + ivf_pq::helpers::erase_list(handle_, index, label); + // NB: passing the type parameter because const->non-const implicit conversion of the mdspans + // breaks type inference + ivf_pq::helpers::extend_list( + handle_, index, vectors_1.view(), indices.view(), label); + + auto& new_list = index->lists()[label]; + ASSERT_NE(old_list.get(), new_list.get()) + << "The old list should have been shared and retained after ivf_pq index has erased the " + "corresponding cluster."; + + auto vectors_2 = make_device_matrix(handle_, n_rows, index->dim()); + ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_2.view(), label, 0); + // The code search is unstable, and there's high chance of repeating values of the lvl-2 codes. + // Hence, encoding-decoding chain often leads to altering both the PQ codes and the + // reconstructed data. + compare_vectors_l2( + handle_, vectors_1.view(), vectors_2.view(), label, compression_ratio, 0.025); + } + + void check_packing(index* index, uint32_t label) + { + auto old_list = index->lists()[label]; + auto n_rows = old_list->size.load(); + + if (n_rows == 0) { return; } + + auto codes = make_device_matrix(handle_, n_rows, index->pq_dim()); + auto indices = make_device_vector(handle_, n_rows); + copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); + + ivf_pq::helpers::unpack_list_data(handle_, *index, codes.view(), label, 0); + ivf_pq::helpers::erase_list(handle_, index, label); + ivf_pq::helpers::extend_list_with_codes( + handle_, index, codes.view(), indices.view(), label); + + auto& new_list = index->lists()[label]; + ASSERT_NE(old_list.get(), new_list.get()) + << "The old list should have been shared and retained after ivf_pq index has erased the " + "corresponding cluster."; + auto list_data_size = (n_rows / ivf_pq::kIndexGroupSize) * new_list->data.extent(1) * + new_list->data.extent(2) * new_list->data.extent(3); + + ASSERT_TRUE(old_list->data.size() >= list_data_size); + ASSERT_TRUE(new_list->data.size() >= list_data_size); + ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), + new_list->data.data_handle(), + list_data_size, + Compare{})); + + // Pack a few vectors back to the list. + int row_offset = 9; + int n_vec = 3; + ASSERT_TRUE(row_offset + n_vec < n_rows); + size_t offset = row_offset * index->pq_dim(); + auto codes_to_pack = make_device_matrix_view( + codes.data_handle() + offset, n_vec, index->pq_dim()); + ivf_pq::helpers::pack_list_data(handle_, index, codes_to_pack, label, row_offset); + ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), + new_list->data.data_handle(), + list_data_size, + Compare{})); + + // Another test with the API that take list_data directly + auto list_data = index->lists()[label]->data.view(); + uint32_t n_take = 4; + ASSERT_TRUE(row_offset + n_take < n_rows); + auto codes2 = raft::make_device_matrix(handle_, n_take, index->pq_dim()); + ivf_pq::helpers::codepacker::unpack( + handle_, list_data, index->pq_bits(), row_offset, codes2.view()); + + // Write it back + ivf_pq::helpers::codepacker::pack( + handle_, make_const_mdspan(codes2.view()), index->pq_bits(), row_offset, list_data); + ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), + new_list->data.data_handle(), + list_data_size, + Compare{})); + } + template void run(BuildIndex build_index) { index index = build_index(); + double compression_ratio = + static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); + + for (uint32_t label = 0; label < index.n_lists(); label++) { + switch (label % 3) { + case 0: { + // Reconstruct and re-write vectors for one label + check_reconstruct_extend(&index, compression_ratio, label); + } break; + case 1: { + // Dump and re-write codes for one label + check_packing(&index, label); + } break; + default: { + // check a small subset of data in a randomly chosen cluster to see if the data + // reconstruction works well. + check_reconstruction(index, compression_ratio, label, 100, 7); + } + } + } + size_t queries_size = ps.num_queries * ps.k; std::vector indices_ivf_pq(queries_size); std::vector distances_ivf_pq(queries_size); @@ -255,11 +424,9 @@ class ivf_pq_test : public ::testing::TestWithParam { // A very conservative lower bound on recall double min_recall = static_cast(ps.search_params.n_probes) / static_cast(ps.index_params.n_lists); - double low_precision_factor = - static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); // Using a heuristic to lower the required recall due to code-packing errors min_recall = - std::min(std::erfc(0.05 * low_precision_factor / std::max(min_recall, 0.5)), min_recall); + std::min(std::erfc(0.05 * compression_ratio / std::max(min_recall, 0.5)), min_recall); // Use explicit per-test min recall value if provided. min_recall = ps.min_recall.value_or(min_recall); @@ -269,7 +436,7 @@ class ivf_pq_test : public ::testing::TestWithParam { distances_ivf_pq, ps.num_queries, ps.k, - 0.0001 * low_precision_factor, + 0.0001 * compression_ratio, min_recall)) << ps; From 574f8f8819465a8d03653f3cd6f66c342544cc32 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 17 Apr 2023 14:19:51 -0700 Subject: [PATCH 15/78] Add python bindings for matrix::select_k (#1422) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1422 --- cpp/CMakeLists.txt | 7 +- cpp/include/raft/matrix/select_k.cuh | 20 +-- cpp/include/raft_runtime/matrix/select_k.hpp | 32 +++++ .../raft_internal/matrix/select_k.cuh | 13 +- cpp/src/matrix/select_k_float_int64_t.cu | 37 +++++ python/pylibraft/CMakeLists.txt | 1 + .../pylibraft/pylibraft/matrix/CMakeLists.txt | 24 ++++ .../pylibraft/pylibraft/matrix/__init__.pxd | 14 ++ python/pylibraft/pylibraft/matrix/__init__.py | 18 +++ .../pylibraft/matrix/cpp/__init__.pxd | 0 .../pylibraft/matrix/cpp/__init__.py | 14 ++ .../pylibraft/matrix/cpp/select_k.pxd | 39 +++++ .../pylibraft/pylibraft/matrix/select_k.pyx | 133 ++++++++++++++++++ .../pylibraft/neighbors/brute_force.pyx | 3 +- ...test_brue_force.py => test_brute_force.py} | 0 .../pylibraft/pylibraft/test/test_doctests.py | 2 + .../pylibraft/pylibraft/test/test_select_k.py | 54 +++++++ 17 files changed, 389 insertions(+), 22 deletions(-) create mode 100644 cpp/include/raft_runtime/matrix/select_k.hpp create mode 100644 cpp/src/matrix/select_k_float_int64_t.cu create mode 100644 python/pylibraft/pylibraft/matrix/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/matrix/__init__.pxd create mode 100644 python/pylibraft/pylibraft/matrix/__init__.py create mode 100644 python/pylibraft/pylibraft/matrix/cpp/__init__.pxd create mode 100644 python/pylibraft/pylibraft/matrix/cpp/__init__.py create mode 100644 python/pylibraft/pylibraft/matrix/cpp/select_k.pxd create mode 100644 python/pylibraft/pylibraft/matrix/select_k.pyx rename python/pylibraft/pylibraft/test/{test_brue_force.py => test_brute_force.py} (100%) create mode 100644 python/pylibraft/pylibraft/test/test_select_k.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6461492169..62f9ac604e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -70,13 +70,11 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations" ${RAFT_COMPILE_LIBRARY_DEFAULT} ) - -# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs -# to have different values for the `Threads::Threads` target. Setting this flag ensures +# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to +# have different values for the `Threads::Threads` target. Setting this flag ensures # `Threads::Threads` is the same value across all builds so that cache hits occur set(THREADS_PREFER_PTHREAD_FLAG ON) - include(CMakeDependentOption) # cmake_dependent_option( RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for # nearest neighbors search on GPU" ON RAFT_COMPILE_LIBRARY OFF ) @@ -329,6 +327,7 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu + src/matrix/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_float_uint32_t.cu src/matrix/specializations/detail/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_half_uint32_t.cu diff --git a/cpp/include/raft/matrix/select_k.cuh b/cpp/include/raft/matrix/select_k.cuh index 9a1a14fd73..7951cbdb03 100644 --- a/cpp/include/raft/matrix/select_k.cuh +++ b/cpp/include/raft/matrix/select_k.cuh @@ -42,13 +42,13 @@ namespace raft::matrix { * @code{.cpp} * using namespace raft; * // get a 2D row-major array of values to search through - * auto in_values = {... input device_matrix_view ...} + * auto in_values = {... input device_matrix_view ...} * // prepare output arrays - * auto out_extents = make_extents(in_values.extent(0), k); + * auto out_extents = make_extents(in_values.extent(0), k); * auto out_values = make_device_mdarray(handle, out_extents); - * auto out_indices = make_device_mdarray(handle, out_extents); + * auto out_indices = make_device_mdarray(handle, out_extents); * // search `k` smallest values in each row - * matrix::select_k( + * matrix::select_k( * handle, in_values, std::nullopt, out_values.view(), out_indices.view(), true); * @endcode * @@ -76,13 +76,13 @@ namespace raft::matrix { */ template void select_k(const device_resources& handle, - raft::device_matrix_view in_val, - std::optional> in_idx, - raft::device_matrix_view out_val, - raft::device_matrix_view out_idx, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, bool select_min) { - RAFT_EXPECTS(out_val.extent(1) <= size_t(std::numeric_limits::max()), + RAFT_EXPECTS(out_val.extent(1) <= int64_t(std::numeric_limits::max()), "output k must fit the int type."); auto batch_size = in_val.extent(0); auto len = in_val.extent(1); @@ -93,7 +93,7 @@ void select_k(const device_resources& handle, RAFT_EXPECTS(batch_size == in_idx->extent(0), "batch sizes must be equal"); RAFT_EXPECTS(len == in_idx->extent(1), "value and index input lengths must be equal"); } - RAFT_EXPECTS(size_t(k) == out_idx.extent(1), "value and index output lengths must be equal"); + RAFT_EXPECTS(int64_t(k) == out_idx.extent(1), "value and index output lengths must be equal"); return detail::select_k(in_val.data_handle(), in_idx.has_value() ? in_idx->data_handle() : nullptr, batch_size, diff --git a/cpp/include/raft_runtime/matrix/select_k.hpp b/cpp/include/raft_runtime/matrix/select_k.hpp new file mode 100644 index 0000000000..08c0e01d0a --- /dev/null +++ b/cpp/include/raft_runtime/matrix/select_k.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace raft::runtime::matrix { +void select_k(const device_resources& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min); + +} // namespace raft::runtime::matrix diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh index 188122c9b4..a3535f8ffd 100644 --- a/cpp/internal/raft_internal/matrix/select_k.cuh +++ b/cpp/internal/raft_internal/matrix/select_k.cuh @@ -91,12 +91,13 @@ void select_k_impl(const device_resources& handle, auto stream = handle.get_stream(); switch (algo) { case Algo::kPublicApi: { - auto in_extent = make_extents(batch_size, len); - auto out_extent = make_extents(batch_size, k); - auto in_span = make_mdspan(in, in_extent); - auto in_idx_span = make_mdspan(in_idx, in_extent); - auto out_span = make_mdspan(out, out_extent); - auto out_idx_span = make_mdspan(out_idx, out_extent); + auto in_extent = make_extents(batch_size, len); + auto out_extent = make_extents(batch_size, k); + auto in_span = make_mdspan(in, in_extent); + auto in_idx_span = + make_mdspan(in_idx, in_extent); + auto out_span = make_mdspan(out, out_extent); + auto out_idx_span = make_mdspan(out_idx, out_extent); if (in_idx == nullptr) { // NB: std::nullopt prevents automatic inference of the template parameters. return matrix::select_k( diff --git a/cpp/src/matrix/select_k_float_int64_t.cu b/cpp/src/matrix/select_k_float_int64_t.cu new file mode 100644 index 0000000000..309ac50c6b --- /dev/null +++ b/cpp/src/matrix/select_k_float_int64_t.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +namespace raft::runtime::matrix { + +void select_k(const device_resources& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min) +{ + raft::matrix::select_k(handle, in_val, in_idx, out_val, out_idx, select_min); +} +} // namespace raft::runtime::matrix diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index 349a2b08ba..069bd98222 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -86,6 +86,7 @@ rapids_cython_init() add_subdirectory(pylibraft/common) add_subdirectory(pylibraft/distance) +add_subdirectory(pylibraft/matrix) add_subdirectory(pylibraft/neighbors) add_subdirectory(pylibraft/random) add_subdirectory(pylibraft/cluster) diff --git a/python/pylibraft/pylibraft/matrix/CMakeLists.txt b/python/pylibraft/pylibraft/matrix/CMakeLists.txt new file mode 100644 index 0000000000..ffba10dea9 --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/CMakeLists.txt @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources select_k.pyx) +set(linked_libraries raft::raft raft::compiled) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS raft MODULE_PREFIX matrix_ +) diff --git a/python/pylibraft/pylibraft/matrix/__init__.pxd b/python/pylibraft/pylibraft/matrix/__init__.pxd new file mode 100644 index 0000000000..a7e7b75096 --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/matrix/__init__.py b/python/pylibraft/pylibraft/matrix/__init__.py new file mode 100644 index 0000000000..5eb35795ed --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .select_k import select_k + +__all__ = ["select_k"] diff --git a/python/pylibraft/pylibraft/matrix/cpp/__init__.pxd b/python/pylibraft/pylibraft/matrix/cpp/__init__.pxd new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/matrix/cpp/__init__.py b/python/pylibraft/pylibraft/matrix/cpp/__init__.py new file mode 100644 index 0000000000..8f2cc34855 --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/cpp/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd b/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd new file mode 100644 index 0000000000..ab466fdce6 --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/cpp/select_k.pxd @@ -0,0 +1,39 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from libc.stdint cimport int64_t +from libcpp cimport bool + +from pylibraft.common.cpp.mdspan cimport device_matrix_view, row_major +from pylibraft.common.cpp.optional cimport optional +from pylibraft.common.handle cimport device_resources + + +cdef extern from "raft_runtime/matrix/select_k.hpp" \ + namespace "raft::runtime::matrix" nogil: + + cdef void select_k(const device_resources & handle, + device_matrix_view[float, int64_t, row_major], + optional[device_matrix_view[int64_t, + int64_t, + row_major]], + device_matrix_view[float, int64_t, row_major], + device_matrix_view[int64_t, int64_t, row_major], + bool) except + diff --git a/python/pylibraft/pylibraft/matrix/select_k.pyx b/python/pylibraft/pylibraft/matrix/select_k.pyx new file mode 100644 index 0000000000..fbb1e2e5d3 --- /dev/null +++ b/python/pylibraft/pylibraft/matrix/select_k.pyx @@ -0,0 +1,133 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cython.operator cimport dereference as deref +from libc.stdint cimport int64_t +from libcpp cimport bool + +import numpy as np + +from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray +from pylibraft.common.handle import auto_sync_handle +from pylibraft.common.input_validation import is_c_contiguous + +from pylibraft.common.cpp.mdspan cimport ( + device_matrix_view, + host_matrix_view, + make_device_matrix_view, + make_host_matrix_view, + row_major, +) +from pylibraft.common.cpp.optional cimport optional +from pylibraft.common.handle cimport device_resources +from pylibraft.common.mdspan cimport get_dmv_float, get_dmv_int64 +from pylibraft.matrix.cpp.select_k cimport select_k as c_select_k + + +@auto_sync_handle +@auto_convert_output +def select_k(dataset, k=None, distances=None, indices=None, select_min=True, + handle=None): + """ + Selects the top k items from each row in a matrix + + + Parameters + ---------- + dataset : array interface compliant matrix, row-major layout, + shape (n_rows, dim). Supported dtype [float] + k : int + Number of items to return for each row. Optional if indices or + distances arrays are given (in which case their second dimension + is k). + distances : Optional array interface compliant matrix shape + (n_rows, k), dtype float. If supplied, + distances will be written here in-place. (default None) + indices : Optional array interface compliant matrix shape + (n_rows, k), dtype int64_t. If supplied, neighbor + indices will be written here in-place. (default None) + select_min: : bool + Whether to select the minimum or maximum K items + + {handle_docstring} + + Returns + ------- + distances: array interface compliant object containing resulting distances + shape (n_rows, k) + + indices: array interface compliant object containing resulting indices + shape (n_rows, k) + + Examples + -------- + + >>> import cupy as cp + + >>> from pylibraft.matrix import select_k + + >>> n_features = 50 + >>> n_rows = 1000 + + >>> queries = cp.random.random_sample((n_rows, n_features), + ... dtype=cp.float32) + >>> k = 40 + >>> distances, ids = select_k(queries, k) + >>> distances = cp.asarray(distances) + >>> ids = cp.asarray(ids) + """ + + dataset_cai = cai_wrapper(dataset) + + if k is None: + if indices is not None: + k = cai_wrapper(indices).shape[1] + elif distances is not None: + k = cai_wrapper(distances).shape[1] + else: + raise ValueError("Argument k must be specified if both indices " + "and distances arg is None") + + n_rows = dataset.shape[0] + if indices is None: + indices = device_ndarray.empty((n_rows, k), dtype='int64') + + if distances is None: + distances = device_ndarray.empty((n_rows, k), dtype='float32') + + distances_cai = cai_wrapper(distances) + indices_cai = cai_wrapper(indices) + + cdef device_resources* handle_ = \ + handle.getHandle() + + cdef optional[device_matrix_view[int64_t, int64_t, row_major]] in_idx + + if dataset_cai.dtype == np.float32: + c_select_k(deref(handle_), + get_dmv_float(dataset_cai, check_shape=True), + in_idx, + get_dmv_float(distances_cai, check_shape=True), + get_dmv_int64(indices_cai, check_shape=True), + select_min) + else: + raise TypeError("dtype %s not supported" % dataset_cai.dtype) + + return distances, indices diff --git a/python/pylibraft/pylibraft/neighbors/brute_force.pyx b/python/pylibraft/pylibraft/neighbors/brute_force.pyx index dbd888756d..8836307a5a 100644 --- a/python/pylibraft/pylibraft/neighbors/brute_force.pyx +++ b/python/pylibraft/pylibraft/neighbors/brute_force.pyx @@ -40,7 +40,6 @@ from pylibraft.common.handle cimport device_resources from pylibraft.common.mdspan cimport get_dmv_float, get_dmv_int64 from pylibraft.common.handle import auto_sync_handle -from pylibraft.common.input_validation import is_c_contiguous from pylibraft.common.interruptible import cuda_interruptible from pylibraft.distance.distance_type cimport DistanceType @@ -144,7 +143,7 @@ def knn(dataset, queries, k=None, indices=None, distances=None, raise ValueError("Argument k must be specified if both indices " "and distances arg is None") - n_queries = cai_wrapper(queries).shape[0] + n_queries = queries_cai.shape[0] if indices is None: indices = device_ndarray.empty((n_queries, k), dtype='int64') diff --git a/python/pylibraft/pylibraft/test/test_brue_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_brue_force.py rename to python/pylibraft/pylibraft/test/test_brute_force.py diff --git a/python/pylibraft/pylibraft/test/test_doctests.py b/python/pylibraft/pylibraft/test/test_doctests.py index 34be6c55f5..19e5c5c22f 100644 --- a/python/pylibraft/pylibraft/test/test_doctests.py +++ b/python/pylibraft/pylibraft/test/test_doctests.py @@ -22,6 +22,7 @@ import pylibraft.cluster import pylibraft.distance +import pylibraft.matrix import pylibraft.neighbors import pylibraft.random @@ -94,6 +95,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance)) +DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force)) diff --git a/python/pylibraft/pylibraft/test/test_select_k.py b/python/pylibraft/pylibraft/test/test_select_k.py new file mode 100644 index 0000000000..203e735b9c --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_select_k.py @@ -0,0 +1,54 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +from pylibraft.common import device_ndarray +from pylibraft.matrix import select_k + + +@pytest.mark.parametrize("n_rows", [32, 100]) +@pytest.mark.parametrize("n_cols", [40, 100]) +@pytest.mark.parametrize("k", [1, 5, 16, 35]) +@pytest.mark.parametrize("inplace", [True, False]) +def test_select_k(n_rows, n_cols, k, inplace): + dataset = np.random.random_sample((n_rows, n_cols)).astype("float32") + dataset_device = device_ndarray(dataset) + + indices = np.zeros((n_rows, k), dtype="int64") + distances = np.zeros((n_rows, k), dtype="float32") + indices_device = device_ndarray(indices) + distances_device = device_ndarray(distances) + + ret_distances, ret_indices = select_k( + dataset_device, + k=k, + distances=distances_device, + indices=indices_device, + ) + + distances_device = ret_distances if not inplace else distances_device + actual_distances = distances_device.copy_to_host() + argsort = np.argsort(dataset, axis=1) + + for i in range(dataset.shape[0]): + expected_indices = argsort[i] + gpu_dists = actual_distances[i] + + cpu_ordered = dataset[i, expected_indices] + np.testing.assert_allclose( + cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4 + ) From ff58b8bbbf5ecdd7b78a628191ca0a15d3d62767 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 18 Apr 2023 17:06:50 +0200 Subject: [PATCH 16/78] Fix dim param for IVF-PQ wrapper in ANN bench (#1427) The `index_` is not yet initialized. To construct the dataset view, we need to use the `dim_` variable which was set in the constructor. Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1427 --- cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h index 70dff81847..517272e6cf 100644 --- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h @@ -42,6 +42,7 @@ template class RaftIvfPQ : public ANN { public: using typename ANN::AnnSearchParam; + using ANN::dim_; struct SearchParam : public AnnSearchParam { raft::neighbors::ivf_pq::search_params pq_param; @@ -118,7 +119,7 @@ void RaftIvfPQ::load(const std::string& file) template void RaftIvfPQ::build(const T* dataset, size_t nrow, cudaStream_t) { - auto dataset_v = raft::make_device_matrix_view(dataset, IdxT(nrow), index_->dim()); + auto dataset_v = raft::make_device_matrix_view(dataset, IdxT(nrow), dim_); index_.emplace(raft::runtime::neighbors::ivf_pq::build(handle_, index_params_, dataset_v)); return; From 5d68c5742be5d895f5650f42bdd3c27348ec56eb Mon Sep 17 00:00:00 2001 From: Sevag H Date: Tue, 18 Apr 2023 12:14:52 -0400 Subject: [PATCH 17/78] Remove wheel pytest verbosity (#1424) This PR removes the verbose flag from wheel pytest commands Authors: - Sevag H (https://github.com/sevagh) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1424 --- .github/workflows/pr.yaml | 4 ++-- .github/workflows/test.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fc8c8d516e..c51d5c0a34 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -83,7 +83,7 @@ jobs: test-before-amd64: "pip install cupy-cuda11x" # On arm also need to install cupy from the specific webpage. test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test" + test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test" test-smoketest: "python ./ci/wheel_smoke_test_pylibraft.py" wheel-build-raft-dask: needs: wheel-tests-pylibraft @@ -105,5 +105,5 @@ jobs: # Always want to test against latest dask/distributed. test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" - test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test" + test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test" test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index dc8f7b6f2b..05e96a6dff 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -41,7 +41,7 @@ jobs: package-name: pylibraft test-before-amd64: "pip install cupy-cuda11x" test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test" + test-unittest: "python -m pytest ./python/pylibraft/pylibraft/test" wheel-tests-raft-dask: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.06 @@ -53,4 +53,4 @@ jobs: package-name: raft_dask test-before-amd64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" test-before-arm64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" - test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test" + test-unittest: "python -m pytest ./python/raft-dask/raft_dask/test" From b1939564ed3d38095efe4cdc3049c9acf05624fd Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 18 Apr 2023 14:10:12 -0400 Subject: [PATCH 18/78] Removing cuda stream view include from mdarray (#1429) cc @wphicks Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1429 --- cpp/include/raft/core/mdarray.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp index 61c1b500e6..88f90485dd 100644 --- a/cpp/include/raft/core/mdarray.hpp +++ b/cpp/include/raft/core/mdarray.hpp @@ -29,7 +29,6 @@ #include #include #include -#include namespace raft { /** @@ -45,11 +44,11 @@ namespace raft { template class array_interface { /** - * @brief Get a mdspan that can be passed down to CUDA kernels. + * @brief Get an mdspan */ auto view() noexcept { return static_cast(this)->view(); } /** - * @brief Get a mdspan that can be passed down to CUDA kernels. + * @brief Get an mdspan */ auto view() const noexcept { return static_cast(this)->view(); } }; @@ -108,7 +107,8 @@ inline constexpr bool is_array_interface_v = is_array_interface::value; * template. * * - Most of the constructors from the reference implementation is removed to make sure - * CUDA stream is honorred. + * CUDA stream is honored. Note that this class is not coupled to CUDA and therefore + * will only be used in the case where the device variant is used. * * - unique_size is not implemented, which is still working in progress in the proposal * @@ -220,11 +220,11 @@ class mdarray #undef RAFT_MDARRAY_CTOR_CONSTEXPR /** - * @brief Get a mdspan that can be passed down to CUDA kernels. + * @brief Get an mdspan */ auto view() noexcept { return view_type(c_.data(), map_, cp_.make_accessor_policy()); } /** - * @brief Get a mdspan that can be passed down to CUDA kernels. + * @brief Get an mdspan */ auto view() const noexcept { From 6b021f5a562a52a0488c8393f38a7c50af81ba18 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 19 Apr 2023 12:50:13 -0700 Subject: [PATCH 19/78] Use nvtx3 includes. (#1431) This PR updates raft to use `#include ` instead of `#include `. This ensures we fetch the header-only NVTX v3. See NVTX docs for more information: https://nvidia.github.io/NVTX/#c-and-c Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/1431 --- cpp/bench/ann/src/common/benchmark.hpp | 2 +- cpp/include/raft/core/detail/nvtx.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index b4d8fbeee3..c34b95010f 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -14,7 +14,7 @@ * limitations under the License. */ #ifdef NVTX -#include +#include #endif #include diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index 4a16ec81bd..adbf3a3666 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -25,7 +25,7 @@ namespace raft::common::nvtx::detail { #include #include #include -#include +#include #include #include #include From fa51c47f075d78526467f947aa5dc00b781e391a Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 19 Apr 2023 14:56:48 -0700 Subject: [PATCH 20/78] Remove MetricProcessor code from brute_force::knn (#1426) Stop using the MetricProcessor code to preprocess the inputs to the bfknn calls. Since the pairwise distance API supports both cosine and correlation distance, this wasn't required anymore - and it introduced NaN values to the input when passed a dataset with one of the rows being all zero. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1426 --- .../raft/neighbors/detail/knn_brute_force.cuh | 91 ++++++++++--------- .../pylibraft/test/test_brute_force.py | 3 - 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh index a776ce2586..b3c4818e70 100644 --- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh +++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh @@ -56,7 +56,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle, size_t m, size_t n, size_t d, - int k, + size_t k, ElementType* distances, // size (m, k) IndexType* indices, // size (m, k) raft::distance::DistanceType metric, @@ -79,7 +79,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle, if (max_col_tile_size && (tile_cols > max_col_tile_size)) { tile_cols = max_col_tile_size; } // tile_cols must be at least k items - tile_cols = std::max(tile_cols, static_cast(k)); + tile_cols = std::max(tile_cols, k); // stores pairwise distances for the current tile rmm::device_uvector temp_distances(tile_rows * tile_cols, stream); @@ -90,13 +90,34 @@ void tiled_brute_force_knn(const raft::device_resources& handle, rmm::device_uvector search_norms(0, stream); rmm::device_uvector index_norms(0, stream); if (metric == raft::distance::DistanceType::L2Expanded || - metric == raft::distance::DistanceType::L2SqrtExpanded) { + metric == raft::distance::DistanceType::L2SqrtExpanded || + metric == raft::distance::DistanceType::CosineExpanded) { search_norms.resize(m, stream); index_norms.resize(n, stream); - raft::linalg::rowNorm( - search_norms.data(), search, d, m, raft::linalg::NormType::L2Norm, true, stream); - raft::linalg::rowNorm( - index_norms.data(), index, d, n, raft::linalg::NormType::L2Norm, true, stream); + // cosine needs the l2norm, where as l2 distances needs the squared norm + if (metric == raft::distance::DistanceType::CosineExpanded) { + raft::linalg::rowNorm(search_norms.data(), + search, + d, + m, + raft::linalg::NormType::L2Norm, + true, + stream, + raft::sqrt_op{}); + raft::linalg::rowNorm(index_norms.data(), + index, + d, + n, + raft::linalg::NormType::L2Norm, + true, + stream, + raft::sqrt_op{}); + } else { + raft::linalg::rowNorm( + search_norms.data(), search, d, m, raft::linalg::NormType::L2Norm, true, stream); + raft::linalg::rowNorm( + index_norms.data(), index, d, n, raft::linalg::NormType::L2Norm, true, stream); + } pairwise_metric = raft::distance::DistanceType::InnerProduct; } @@ -109,20 +130,17 @@ void tiled_brute_force_knn(const raft::device_resources& handle, // in which case the number of columns here is too high in the temp output. // adjust if necessary auto last_col_tile_size = n % tile_cols; - if (last_col_tile_size && (last_col_tile_size < static_cast(k))) { - temp_out_cols -= k - last_col_tile_size; - } + if (last_col_tile_size && (last_col_tile_size < k)) { temp_out_cols -= k - last_col_tile_size; } // if we have less than k items in the index, we should fill out the result // to indicate that we are missing items (and match behaviour in faiss) - if (n < static_cast(k)) { + if (n < k) { raft::matrix::fill(handle, - raft::make_device_matrix_view(distances, m, static_cast(k)), + raft::make_device_matrix_view(distances, m, k), std::numeric_limits::lowest()); if constexpr (std::is_signed_v) { - raft::matrix::fill( - handle, raft::make_device_matrix_view(indices, m, static_cast(k)), IndexType{-1}); + raft::matrix::fill(handle, raft::make_device_matrix_view(indices, m, k), IndexType{-1}); } } @@ -136,7 +154,7 @@ void tiled_brute_force_knn(const raft::device_resources& handle, for (size_t j = 0; j < n; j += tile_cols) { size_t current_centroid_size = std::min(tile_cols, n - j); - size_t current_k = std::min(current_centroid_size, static_cast(k)); + size_t current_k = std::min(current_centroid_size, k); // calculate the top-k elements for the current tile, by calculating the // full pairwise distance for the tile - and then selecting the top-k from that @@ -176,6 +194,21 @@ void tiled_brute_force_knn(const raft::device_resources& handle, val = distance_epilogue(val, row, col); return val; }); + } else if (metric == raft::distance::DistanceType::CosineExpanded) { + auto row_norms = search_norms.data(); + auto col_norms = index_norms.data(); + auto dist = temp_distances.data(); + + raft::linalg::map_offset( + handle, + raft::make_device_vector_view(dist, current_query_size * current_centroid_size), + [=] __device__(IndexType idx) { + IndexType row = i + (idx / current_centroid_size); + IndexType col = j + (idx % current_centroid_size); + auto val = 1.0 - dist[idx] / (row_norms[row] * col_norms[col]); + val = distance_epilogue(val, row, col); + return val; + }); } else { // if we're not l2 distance, and we have a distance epilogue - run it now if constexpr (!std::is_same_v) { @@ -310,18 +343,6 @@ void brute_force_knn_impl( id_ranges = translations; } - // perform preprocessing - std::unique_ptr> query_metric_processor = - create_processor(metric, n, D, k, rowMajorQuery, userStream); - query_metric_processor->preprocess(search_items); - - std::vector>> metric_processors(input.size()); - for (size_t i = 0; i < input.size(); i++) { - metric_processors[i] = - create_processor(metric, sizes[i], D, k, rowMajorQuery, userStream); - metric_processors[i]->preprocess(input[i]); - } - int device; RAFT_CUDA_TRY(cudaGetDevice(&device)); @@ -430,14 +451,6 @@ void brute_force_knn_impl( raft::linalg::transpose(handle, input[i], index, sizes[i], D, stream); } - // cosine/correlation are handled by metric processor, use IP distance - // for brute force knn call. - auto tiled_metric = metric; - if (metric == raft::distance::DistanceType::CosineExpanded || - metric == raft::distance::DistanceType::CorrelationExpanded) { - tiled_metric = raft::distance::DistanceType::InnerProduct; - } - tiled_brute_force_knn(stream_pool_handle, search, index, @@ -447,7 +460,7 @@ void brute_force_knn_impl( k, out_d_ptr, out_i_ptr, - tiled_metric, + metric, metricArg, 0, 0, @@ -470,12 +483,6 @@ void brute_force_knn_impl( knn_merge_parts(out_D, out_I, res_D, res_I, n, input.size(), k, userStream, trans.data()); } - query_metric_processor->revert(search_items); - query_metric_processor->postprocess(out_D); - for (size_t i = 0; i < input.size(); i++) { - metric_processors[i]->revert(input[i]); - } - if (translations == nullptr) delete id_ranges; }; diff --git a/python/pylibraft/pylibraft/test/test_brute_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py index f349be892d..0bd5e6eaaf 100644 --- a/python/pylibraft/pylibraft/test/test_brute_force.py +++ b/python/pylibraft/pylibraft/test/test_brute_force.py @@ -90,9 +90,6 @@ def test_knn( expected_indices = argsort[i] gpu_dists = actual_distances[i] - if metric == "correlation" or metric == "cosine": - gpu_dists = gpu_dists[::-1] - cpu_ordered = pw_dists[i, expected_indices] np.testing.assert_allclose( cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4 From 6105f0e4d326f673447efb9576cc8adc0d1f9caa Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 19 Apr 2023 18:41:14 -0400 Subject: [PATCH 21/78] Minor Updates to Sparse Structures (#1432) Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1432 --- cpp/include/raft/core/coo_matrix.hpp | 35 ++++++++++++-------- cpp/include/raft/core/csr_matrix.hpp | 26 +++++++++------ cpp/include/raft/core/device_coo_matrix.hpp | 36 +++++++++------------ cpp/include/raft/core/device_csr_matrix.hpp | 31 ++++++++---------- cpp/include/raft/core/host_coo_matrix.hpp | 33 +++++++++---------- cpp/include/raft/core/host_csr_matrix.hpp | 33 +++++++++---------- cpp/include/raft/core/sparse_types.hpp | 17 +++++----- 7 files changed, 109 insertions(+), 102 deletions(-) diff --git a/cpp/include/raft/core/coo_matrix.hpp b/cpp/include/raft/core/coo_matrix.hpp index efab8a1601..a5f7c05493 100644 --- a/cpp/include/raft/core/coo_matrix.hpp +++ b/cpp/include/raft/core/coo_matrix.hpp @@ -71,12 +71,6 @@ class coordinate_structure_view { } - /** - * Create a view from this view. Note that this is for interface compatibility - * @return - */ - view_type view() { return view_type(rows_, cols_, this->get_n_rows(), this->get_n_cols()); } - /** * Return span containing underlying rows array * @return span containing underlying rows array @@ -209,6 +203,10 @@ class coo_matrix_view coordinate_structure_view, is_device> { public: + using element_type = ElementType; + using row_type = RowType; + using col_type = ColType; + using nnz_type = NZType; coo_matrix_view(raft::span element_span, coordinate_structure_view structure_view) : sparse_matrix_view { public: using element_type = ElementType; + using row_type = RowType; + using col_type = ColType; + using nnz_type = NZType; using structure_view_type = typename structure_type::view_type; using container_type = typename ContainerPolicy::container_type; using sparse_matrix_type = @@ -258,14 +259,9 @@ class coo_matrix // Constructor that owns the data but not the structure template > - coo_matrix(raft::resources const& handle, std::shared_ptr structure) noexcept( + coo_matrix(raft::resources const& handle, structure_type structure) noexcept( std::is_nothrow_default_constructible_v) : sparse_matrix_type(handle, structure){}; - /** - * Return a view of the structure underlying this matrix - * @return - */ - structure_view_type structure_view() { return this->structure_.get()->view(); } /** * Initialize the sparsity on this instance if it was not known upon construction @@ -277,7 +273,20 @@ class coo_matrix void initialize_sparsity(NZType nnz) { sparse_matrix_type::initialize_sparsity(nnz); - this->structure_.get()->initialize_sparsity(nnz); + this->structure_.initialize_sparsity(nnz); + } + + /** + * Return a view of the structure underlying this matrix + * @return + */ + structure_view_type structure_view() + { + if constexpr (get_sparsity_type() == SparsityType::OWNING) { + return this->structure_.view(); + } else { + return this->structure_; + } } }; } // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp index fac656b3f9..c37cfa41c8 100644 --- a/cpp/include/raft/core/csr_matrix.hpp +++ b/cpp/include/raft/core/csr_matrix.hpp @@ -87,12 +87,6 @@ class compressed_structure_view */ span get_indices() override { return indices_; } - /** - * Create a view from this view. Note that this is for interface compatibility - * @return - */ - view_type view() { return view_type(indptr_, indices_, this->get_n_cols()); } - protected: raft::span indptr_; raft::span indices_; @@ -221,6 +215,10 @@ class csr_matrix_view compressed_structure_view, is_device> { public: + using element_type = ElementType; + using indptr_type = IndptrType; + using indices_type = IndicesType; + using nnz_type = NZType; csr_matrix_view( raft::span element_span, compressed_structure_view structure_view) @@ -249,6 +247,9 @@ class csr_matrix ContainerPolicy> { public: using element_type = ElementType; + using indptr_type = IndptrType; + using indices_type = IndicesType; + using nnz_type = NZType; using structure_view_type = typename structure_type::view_type; static constexpr auto get_sparsity_type() { return sparsity_type; } using sparse_matrix_type = @@ -271,7 +272,7 @@ class csr_matrix template > - csr_matrix(raft::resources const& handle, std::shared_ptr structure) noexcept( + csr_matrix(raft::resources const& handle, structure_type structure) noexcept( std::is_nothrow_default_constructible_v) : sparse_matrix_type(handle, structure){}; @@ -284,13 +285,20 @@ class csr_matrix void initialize_sparsity(NZType nnz) { sparse_matrix_type::initialize_sparsity(nnz); - this->structure_.get()->initialize_sparsity(nnz); + this->structure_.initialize_sparsity(nnz); } /** * Return a view of the structure underlying this matrix * @return */ - structure_view_type structure_view() { return this->structure_.get()->view(); } + structure_view_type structure_view() + { + if constexpr (get_sparsity_type() == SparsityType::OWNING) { + return this->structure_.view(); + } else { + return this->structure_; + } + } }; } // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp index b1e9ca30fc..35be67431d 100644 --- a/cpp/include/raft/core/device_coo_matrix.hpp +++ b/cpp/include/raft/core/device_coo_matrix.hpp @@ -174,16 +174,15 @@ auto make_device_coo_matrix(raft::resources const& handle, * @tparam ColType * @tparam NZType * @param[in] handle raft handle for managing expensive device resources - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return a sparsity-preserving sparse matrix in coordinate (coo) format */ template auto make_device_coo_matrix(raft::resources const& handle, - device_coordinate_structure_view structure_) + device_coordinate_structure_view structure) { - return device_sparsity_preserving_coo_matrix( - handle, - std::make_shared>(structure_)); + return device_sparsity_preserving_coo_matrix(handle, + structure); } /** @@ -212,16 +211,15 @@ auto make_device_coo_matrix(raft::resources const& handle, * @tparam ColType * @tparam NZType * @param[in] ptr a pointer to array of nonzero matrix elements on device (size nnz) - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return a sparsity-preserving sparse matrix in coordinate (coo) format */ template auto make_device_coo_matrix_view( - ElementType* ptr, device_coordinate_structure_view structure_) + ElementType* ptr, device_coordinate_structure_view structure) { return device_coo_matrix_view( - raft::device_span(ptr, structure_.get_nnz()), - std::make_shared>(structure_)); + raft::device_span(ptr, structure.get_nnz()), structure); } /** @@ -251,19 +249,17 @@ auto make_device_coo_matrix_view( * @tparam ColType * @tparam NZType * @param[in] elements a device span containing nonzero matrix elements (size nnz) - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return */ template auto make_device_coo_matrix_view( raft::device_span elements, - device_coordinate_structure_view structure_) + device_coordinate_structure_view structure) { - RAFT_EXPECTS(elements.size() == structure_.get_nnz(), + RAFT_EXPECTS(elements.size() == structure.get_nnz(), "Size of elements must be equal to the nnz from the structure"); - return device_coo_matrix_view( - elements, - std::make_shared>(structure_)); + return device_coo_matrix_view(elements, structure); } /** @@ -338,7 +334,7 @@ auto make_device_coordinate_structure(raft::resources const& handle, * @return a sparsity-preserving coordinate structural view */ template -auto make_device_coo_structure_view( +auto make_device_coordinate_structure_view( RowType* rows, ColType* cols, RowType n_rows, ColType n_cols, NZType nnz) { return device_coordinate_structure_view( @@ -376,10 +372,10 @@ auto make_device_coo_structure_view( * @return a sparsity-preserving coordinate structural view */ template -auto make_device_coo_structure_view(raft::device_span rows, - raft::device_span cols, - RowType n_rows, - ColType n_cols) +auto make_device_coordinate_structure_view(raft::device_span rows, + raft::device_span cols, + RowType n_rows, + ColType n_cols) { return device_coordinate_structure_view(rows, cols, n_rows, n_cols); } diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp index 59cabacf6d..e4ec15f9bd 100644 --- a/cpp/include/raft/core/device_csr_matrix.hpp +++ b/cpp/include/raft/core/device_csr_matrix.hpp @@ -189,7 +189,7 @@ auto make_device_csr_matrix(raft::device_resources const& handle, * @tparam IndicesType * @tparam NZType * @param[in] handle raft handle for managing expensive device resources - * @param[in] structure_ a sparsity-preserving compressed structural view + * @param[in] structure a sparsity-preserving compressed structural view * @return a sparsity-preserving sparse matrix in compressed (csr) format */ template auto make_device_csr_matrix( raft::device_resources const& handle, - device_compressed_structure_view structure_) + device_compressed_structure_view structure) { return device_sparsity_preserving_csr_matrix( - handle, - std::make_shared>( - structure_)); + handle, structure); } /** @@ -232,7 +230,7 @@ auto make_device_csr_matrix( * @tparam IndicesType * @tparam NZType * @param[in] ptr a pointer to array of nonzero matrix elements on device (size nnz) - * @param[in] structure_ a sparsity-preserving compressed sparse structural view + * @param[in] structure a sparsity-preserving compressed sparse structural view * @return a sparsity-preserving csr matrix view */ template auto make_device_csr_matrix_view( - ElementType* ptr, device_compressed_structure_view structure_) + ElementType* ptr, device_compressed_structure_view structure) { return device_csr_matrix_view( - raft::device_span(ptr, structure_.get_nnz()), std::make_shared(structure_)); + raft::device_span(ptr, structure.get_nnz()), structure); } /** @@ -273,7 +271,7 @@ auto make_device_csr_matrix_view( * @tparam IndicesType * @tparam NZType * @param[in] elements device span containing array of matrix elements (size nnz) - * @param[in] structure_ a sparsity-preserving structural view + * @param[in] structure a sparsity-preserving structural view * @return a sparsity-preserving csr matrix view */ template auto make_device_csr_matrix_view( raft::device_span elements, - device_compressed_structure_view structure_) + device_compressed_structure_view structure) { - RAFT_EXPECTS(elements.size() == structure_.get_nnz(), + RAFT_EXPECTS(elements.size() == structure.get_nnz(), "Size of elements must be equal to the nnz from the structure"); - return device_csr_matrix_view( - elements, std::make_shared(structure_)); + return device_csr_matrix_view(elements, structure); } /** @@ -365,7 +362,7 @@ auto make_device_compressed_structure(raft::device_resources const& handle, * @return a sparsity-preserving compressed structural view */ template -auto make_device_csr_structure_view( +auto make_device_compressed_structure_view( IndptrType* indptr, IndicesType* indices, IndptrType n_rows, IndicesType n_cols, NZType nnz) { return device_compressed_structure_view( @@ -408,9 +405,9 @@ auto make_device_csr_structure_view( * */ template -auto make_device_csr_structure_view(raft::device_span indptr, - raft::device_span indices, - IndicesType n_cols) +auto make_device_compressed_structure_view(raft::device_span indptr, + raft::device_span indices, + IndicesType n_cols) { return device_compressed_structure_view(indptr, indices, n_cols); } diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp index 45ec278a7d..8fabf5aa95 100644 --- a/cpp/include/raft/core/host_coo_matrix.hpp +++ b/cpp/include/raft/core/host_coo_matrix.hpp @@ -173,15 +173,15 @@ auto make_host_coo_matrix(raft::resources const& handle, * @tparam ColType * @tparam NZType * @param[in] handle raft handle for managing expensive resources - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return a sparsity-preserving sparse matrix in coordinate (coo) format */ template auto make_host_coo_matrix(raft::resources const& handle, - host_coordinate_structure_view structure_) + host_coordinate_structure_view structure) { - return host_sparsity_preserving_coo_matrix( - handle, std::make_shared>(structure_)); + return host_sparsity_preserving_coo_matrix(handle, + structure); } /** @@ -210,15 +210,15 @@ auto make_host_coo_matrix(raft::resources const& handle, * @tparam ColType * @tparam NZType * @param[in] ptr a pointer to array of nonzero matrix elements on host (size nnz) - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return a sparsity-preserving sparse matrix in coordinate (coo) format */ template auto make_host_coo_matrix_view(ElementType* ptr, - host_coordinate_structure_view structure_) + host_coordinate_structure_view structure) { return host_coo_matrix_view( - raft::host_span(ptr, structure_.get_nnz()), std::make_shared(structure_)); + raft::host_span(ptr, structure.get_nnz()), structure); } /** @@ -248,17 +248,16 @@ auto make_host_coo_matrix_view(ElementType* ptr, * @tparam ColType * @tparam NZType * @param[in] elements a host span containing nonzero matrix elements (size nnz) - * @param[in] structure_ a sparsity-preserving coordinate structural view + * @param[in] structure a sparsity-preserving coordinate structural view * @return */ template auto make_host_coo_matrix_view(raft::host_span elements, - host_coordinate_structure_view structure_) + host_coordinate_structure_view structure) { - RAFT_EXPECTS(elements.size() == structure_.get_nnz(), + RAFT_EXPECTS(elements.size() == structure.get_nnz(), "Size of elements must be equal to the nnz from the structure"); - return host_coo_matrix_view(elements, - std::make_shared(structure_)); + return host_coo_matrix_view(elements, structure); } /** @@ -333,7 +332,7 @@ auto make_host_coordinate_structure(raft::resources const& handle, * @return a sparsity-preserving coordinate structural view */ template -auto make_host_coo_structure_view( +auto make_host_coordinate_structure_view( RowType* rows, ColType* cols, RowType n_rows, ColType n_cols, NZType nnz) { return host_coordinate_structure_view( @@ -371,10 +370,10 @@ auto make_host_coo_structure_view( * @return a sparsity-preserving coordinate structural view */ template -auto make_host_coo_structure_view(raft::host_span rows, - raft::host_span cols, - RowType n_rows, - ColType n_cols) +auto make_host_coordinate_structure_view(raft::host_span rows, + raft::host_span cols, + RowType n_rows, + ColType n_cols) { return host_coordinate_structure_view(rows, cols, n_rows, n_cols); } diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp index 437f60814e..c64bcdcea6 100644 --- a/cpp/include/raft/core/host_csr_matrix.hpp +++ b/cpp/include/raft/core/host_csr_matrix.hpp @@ -189,20 +189,18 @@ auto make_host_csr_matrix(raft::resources const& handle, * @tparam IndicesType * @tparam NZType * @param[in] handle raft handle for managing expensive resources - * @param[in] structure_ a sparsity-preserving compressed structural view + * @param[in] structure a sparsity-preserving compressed structural view * @return a sparsity-preserving sparse matrix in compressed (csr) format */ template -auto make_host_csr_matrix( - raft::resources const& handle, - host_compressed_structure_view structure_) +auto make_host_csr_matrix(raft::resources const& handle, + host_compressed_structure_view structure) { return host_sparsity_preserving_csr_matrix( - handle, - std::make_shared>(structure_)); + handle, structure); } /** @@ -231,7 +229,7 @@ auto make_host_csr_matrix( * @tparam IndicesType * @tparam NZType * @param[in] ptr a pointer to array of nonzero matrix elements on host (size nnz) - * @param[in] structure_ a sparsity-preserving compressed sparse structural view + * @param[in] structure a sparsity-preserving compressed sparse structural view * @return a sparsity-preserving csr matrix view */ template auto make_host_csr_matrix_view( - ElementType* ptr, host_compressed_structure_view structure_) + ElementType* ptr, host_compressed_structure_view structure) { return host_csr_matrix_view( - raft::host_span(ptr, structure_.get_nnz()), std::make_shared(structure_)); + raft::host_span(ptr, structure.get_nnz()), structure); } /** @@ -272,7 +270,7 @@ auto make_host_csr_matrix_view( * @tparam IndicesType * @tparam NZType * @param[in] elements host span containing array of matrix elements (size nnz) - * @param[in] structure_ a sparsity-preserving structural view + * @param[in] structure a sparsity-preserving structural view * @return a sparsity-preserving csr matrix view */ template auto make_host_csr_matrix_view( raft::host_span elements, - host_compressed_structure_view structure_) + host_compressed_structure_view structure) { - RAFT_EXPECTS(elements.size() == structure_.get_nnz(), + RAFT_EXPECTS(elements.size() == structure.get_nnz(), "Size of elements must be equal to the nnz from the structure"); - return host_csr_matrix_view( - elements, std::make_shared(structure_)); + return host_csr_matrix_view(elements, structure); } /** @@ -365,7 +362,7 @@ auto make_host_compressed_structure(raft::resources const& handle, * @return a sparsity-preserving compressed structural view */ template -auto make_host_csr_structure_view( +auto make_host_compressed_structure_view( IndptrType* indptr, IndicesType* indices, IndptrType n_rows, IndicesType n_cols, NZType nnz) { return host_compressed_structure_view( @@ -408,9 +405,9 @@ auto make_host_csr_structure_view( * */ template -auto make_host_csr_structure_view(raft::host_span indptr, - raft::host_span indices, - IndicesType n_cols) +auto make_host_compressed_structure_view(raft::host_span indptr, + raft::host_span indices, + IndicesType n_cols) { return host_compressed_structure_view(indptr, indices, n_cols); } diff --git a/cpp/include/raft/core/sparse_types.hpp b/cpp/include/raft/core/sparse_types.hpp index 207cc944d2..a14944ed5b 100644 --- a/cpp/include/raft/core/sparse_types.hpp +++ b/cpp/include/raft/core/sparse_types.hpp @@ -109,7 +109,7 @@ class sparse_matrix_view { * Return a view of the structure underlying this matrix * @return */ - structure_view_type get_structure() { return structure_view_; } + structure_view_type structure_view() { return structure_view_; } /** * Return a span of the nonzero elements of the matrix @@ -158,18 +158,19 @@ class sparse_matrix { using container_policy_type = ContainerPolicy; using container_type = typename container_policy_type::container_type; + // constructor that owns the data and the structure sparse_matrix(raft::resources const& handle, row_type n_rows, col_type n_cols, nnz_type nnz = 0) noexcept(std::is_nothrow_default_constructible_v) - : structure_{std::make_shared(handle, n_rows, n_cols, nnz)}, - cp_{}, - c_elements_{cp_.create(handle, 0)} {}; + : structure_{handle, n_rows, n_cols, nnz}, cp_{}, c_elements_{cp_.create(handle, 0)} {}; // Constructor that owns the data but not the structure - sparse_matrix(raft::resources const& handle, std::shared_ptr structure) noexcept( + // This constructor is only callable with a `structure_type == *_structure_view` + // which makes it okay to copy + sparse_matrix(raft::resources const& handle, structure_type structure) noexcept( std::is_nothrow_default_constructible_v) - : structure_{structure}, cp_{}, c_elements_{cp_.create(handle, structure.get()->get_nnz())} {}; + : structure_{structure}, cp_{}, c_elements_{cp_.create(handle, structure_.get_nnz())} {}; constexpr sparse_matrix(sparse_matrix const&) noexcept( std::is_nothrow_copy_constructible_v) = default; @@ -187,7 +188,7 @@ class sparse_matrix { raft::span get_elements() { - return raft::span(c_elements_.data(), structure_view().get_nnz()); + return raft::span(c_elements_.data(), structure_.get_nnz()); } /** @@ -209,7 +210,7 @@ class sparse_matrix { } protected: - std::shared_ptr structure_; + structure_type structure_; container_policy_type cp_; container_type c_elements_; }; From 515ee5fa43d55c0cc14ed166d955db72c7e10f36 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 19 Apr 2023 18:44:55 -0400 Subject: [PATCH 22/78] Add missing resource factory virtual destructor (#1433) Closes #1425 Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/1433 --- cpp/include/raft/core/resource/resource_types.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index cf302e25f9..8e331293bf 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -83,6 +83,8 @@ class resource_factory { * @return resource instance */ virtual resource* make_resource() = 0; + + virtual ~resource_factory() {} }; /** From bc732c09c86ec4e25fe16e2419c6fd7123f0be22 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Thu, 20 Apr 2023 21:50:19 +0200 Subject: [PATCH 23/78] Remove usage of rapids-get-rapids-version-from-git (#1436) Instead of using `rapids-get-rapids-version-from-git` we can just hardcode the version and use `update-version.sh` to update it Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1436 --- ci/build_docs.sh | 2 +- ci/release/update-version.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 5db6fa11be..e52beb22ea 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -19,7 +19,7 @@ rapids-print-env rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) -VERSION_NUMBER=$(rapids-get-rapids-version-from-git) +VERSION_NUMBER="23.06" rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index d8c22b4931..f6c6b08644 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -80,6 +80,7 @@ sed_runner "s/ucx-py.*\",/ucx-py==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\",/g" python for FILE in .github/workflows/*.yaml; do sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done +sed_runner "s/VERSION_NUMBER=\".*/VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile From 0ac32e181f32302537c5056e215cd5d99635a742 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 20 Apr 2023 17:36:42 -0400 Subject: [PATCH 24/78] The glog project root CMakeLists.txt is where we should build from (#1442) Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1442 --- cpp/cmake/thirdparty/get_glog.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_glog.cmake b/cpp/cmake/thirdparty/get_glog.cmake index 9334224de5..35a9170f99 100644 --- a/cpp/cmake/thirdparty/get_glog.cmake +++ b/cpp/cmake/thirdparty/get_glog.cmake @@ -26,7 +26,6 @@ function(find_and_configure_glog) CPM_ARGS GIT_REPOSITORY https://github.com/${PKG_FORK}/glog.git GIT_TAG ${PKG_PINNED_TAG} - SOURCE_SUBDIR cpp EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL} ) @@ -46,4 +45,4 @@ find_and_configure_glog(VERSION 0.6.0 FORK google PINNED_TAG v0.6.0 EXCLUDE_FROM_ALL ON - ) \ No newline at end of file + ) From c0c4d52c5a72c494e070f42324fc80b3a7cda205 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Sun, 23 Apr 2023 16:12:39 -0700 Subject: [PATCH 25/78] fix ivf_pq n_probes (#1456) The ivf-pq search code was including a guard like ```auto n_probes = std::min(params.n_probes, index.n_lists());``` to check to make sure that we weren't selecting more values than are available. However, this wasn't being used and instead just `params.n_probes` was being passed to functions like `select_k`. This lead to asking select_k to select say 100 items, when there were only 90 to choose from - and caused some issues downstream when trying to update the select_k algorithm Fix. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Micka (https://github.com/lowener) - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/1456 --- cpp/include/raft/neighbors/detail/ivf_pq_search.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh index 4b6e6f5e31..9a94458748 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh @@ -1613,7 +1613,7 @@ inline void search(raft::device_resources const& handle, rmm::device_uvector float_queries(max_queries * dim_ext, stream, mr); rmm::device_uvector rot_queries(max_queries * index.rot_dim(), stream, mr); - rmm::device_uvector clusters_to_probe(max_queries * params.n_probes, stream, mr); + rmm::device_uvector clusters_to_probe(max_queries * n_probes, stream, mr); auto search_instance = ivfpq_search::fun(params, index.metric()); @@ -1624,7 +1624,7 @@ inline void search(raft::device_resources const& handle, clusters_to_probe.data(), float_queries.data(), queries_batch, - params.n_probes, + n_probes, index.n_lists(), dim, dim_ext, @@ -1661,10 +1661,10 @@ inline void search(raft::device_resources const& handle, search_instance(handle, index, max_samples, - params.n_probes, + n_probes, k, batch_size, - clusters_to_probe.data() + uint64_t(params.n_probes) * offset_b, + clusters_to_probe.data() + uint64_t(n_probes) * offset_b, rot_queries.data() + uint64_t(index.rot_dim()) * offset_b, neighbors + uint64_t(k) * (offset_q + offset_b), distances + uint64_t(k) * (offset_q + offset_b), From 83c326ec42fbe64ad5149a1a12a3c754a88c5c71 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 24 Apr 2023 18:55:00 -0700 Subject: [PATCH 26/78] Update clang-format to 16.0.1. (#1412) This PR updates the clang-format version used by pre-commit. Authors: - Bradley Dice (https://github.com/bdice) - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - Ray Douglass (https://github.com/raydouglass) - Corey J. Nolet (https://github.com/cjnolet) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/1412 --- .pre-commit-config.yaml | 2 +- .../all_cuda-118_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +- cpp/bench/ann/src/common/dataset.h | 4 +- cpp/bench/prims/matrix/select_k.cu | 41 +++-- cpp/bench/prims/neighbors/knn.cuh | 9 +- .../raft/cluster/detail/kmeans_balanced.cuh | 2 +- .../raft/cluster/single_linkage_types.hpp | 8 +- cpp/include/raft/common/cub_wrappers.cuh | 8 +- .../raft/common/device_loads_stores.cuh | 8 +- cpp/include/raft/common/scatter.cuh | 8 +- cpp/include/raft/common/seive.hpp | 8 +- cpp/include/raft/core/csr_matrix.hpp | 2 +- cpp/include/raft/core/cublas_macros.hpp | 4 +- cpp/include/raft/core/cusolver_macros.hpp | 4 +- cpp/include/raft/core/cusparse_macros.hpp | 4 +- cpp/include/raft/core/detail/logger.hpp | 8 +- .../core/detail/mdspan_numpy_serializer.hpp | 8 +- cpp/include/raft/core/detail/nvtx.hpp | 4 +- cpp/include/raft/core/detail/span.hpp | 23 +-- cpp/include/raft/core/device_coo_matrix.hpp | 6 +- cpp/include/raft/core/device_csr_matrix.hpp | 6 +- cpp/include/raft/core/device_mdspan.hpp | 12 +- cpp/include/raft/core/device_resources.hpp | 4 +- cpp/include/raft/core/handle.hpp | 2 +- cpp/include/raft/core/host_coo_matrix.hpp | 6 +- cpp/include/raft/core/host_csr_matrix.hpp | 6 +- cpp/include/raft/core/host_mdspan.hpp | 6 +- cpp/include/raft/core/interruptible.hpp | 6 +- cpp/include/raft/core/kvp.hpp | 4 +- cpp/include/raft/core/mdarray.hpp | 19 +-- cpp/include/raft/core/mdspan.hpp | 18 +-- cpp/include/raft/core/nvtx.hpp | 8 +- .../raft/core/resource/resource_types.hpp | 2 +- cpp/include/raft/core/resources.hpp | 2 +- cpp/include/raft/core/span.hpp | 4 +- .../raft/core/temporary_device_buffer.hpp | 4 +- .../distance/detail/distance_ops/cutlass.cuh | 6 +- .../distance/detail/masked_distance_base.cuh | 2 +- .../detail/pairwise_distance_base.cuh | 2 +- .../detail/predicated_tile_iterator_normvec.h | 6 +- cpp/include/raft/lap/lap.cuh | 8 +- cpp/include/raft/lap/lap.hpp | 8 +- cpp/include/raft/linalg/add.cuh | 4 +- cpp/include/raft/linalg/binary_op.cuh | 4 +- .../raft/linalg/coalesced_reduction.cuh | 4 +- cpp/include/raft/linalg/contractions.cuh | 13 +- .../raft/linalg/detail/cublas_wrappers.hpp | 10 +- .../raft/linalg/detail/map_then_reduce.cuh | 3 +- cpp/include/raft/linalg/divide.cuh | 4 +- cpp/include/raft/linalg/eig.cuh | 4 +- cpp/include/raft/linalg/gemv.cuh | 4 +- cpp/include/raft/linalg/lanczos.cuh | 8 +- cpp/include/raft/linalg/lstsq.cuh | 4 +- cpp/include/raft/linalg/matrix_vector_op.cuh | 4 +- .../raft/linalg/mean_squared_error.cuh | 4 +- cpp/include/raft/linalg/multiply.cuh | 4 +- cpp/include/raft/linalg/power.cuh | 4 +- cpp/include/raft/linalg/reduce.cuh | 4 +- .../raft/linalg/reduce_cols_by_key.cuh | 4 +- .../raft/linalg/reduce_rows_by_key.cuh | 4 +- cpp/include/raft/linalg/rsvd.cuh | 4 +- cpp/include/raft/linalg/sqrt.cuh | 4 +- cpp/include/raft/linalg/strided_reduction.cuh | 4 +- cpp/include/raft/linalg/subtract.cuh | 4 +- cpp/include/raft/linalg/svd.cuh | 4 +- cpp/include/raft/linalg/ternary_op.cuh | 4 +- cpp/include/raft/linalg/transpose.cuh | 4 +- cpp/include/raft/linalg/unary_op.cuh | 4 +- cpp/include/raft/matrix/col_wise_sort.cuh | 2 +- .../raft/matrix/detail/select_warpsort.cuh | 5 +- cpp/include/raft/matrix/math.cuh | 6 +- cpp/include/raft/matrix/matrix.cuh | 6 +- cpp/include/raft/matrix/matrix.hpp | 8 +- cpp/include/raft/neighbors/ann_types.hpp | 10 +- cpp/include/raft/neighbors/cagra_types.hpp | 8 +- .../detail/cagra/compute_distance.hpp | 2 +- .../raft/neighbors/detail/cagra/fragment.hpp | 3 +- .../neighbors/detail/cagra/graph_core.cuh | 16 +- .../detail/cagra/search_multi_cta.cuh | 14 +- .../detail/cagra/search_multi_kernel.cuh | 36 ++--- .../neighbors/detail/cagra/search_plan.cuh | 2 +- .../detail/cagra/search_single_cta.cuh | 21 ++- .../detail/cagra/topk_for_cagra/topk_core.cuh | 14 +- .../detail/faiss_select/MergeNetworkBlock.cuh | 3 +- .../detail/faiss_select/MergeNetworkWarp.cuh | 3 +- .../neighbors/detail/faiss_select/Select.cuh | 3 +- .../raft/neighbors/detail/ivf_pq_build.cuh | 2 +- .../raft/neighbors/detail/ivf_pq_search.cuh | 2 +- cpp/include/raft/neighbors/ivf_flat_types.hpp | 8 +- cpp/include/raft/neighbors/ivf_list_types.hpp | 8 +- cpp/include/raft/neighbors/ivf_pq_types.hpp | 8 +- cpp/include/raft/random/permute.cuh | 3 +- .../random/sample_without_replacement.cuh | 3 +- .../raft/sparse/detail/cusparse_wrappers.h | 6 +- cpp/include/raft/sparse/hierarchy/common.h | 8 +- .../raft/sparse/hierarchy/single_linkage.cuh | 8 +- .../raft/sparse/linalg/detail/norm.cuh | 22 +-- .../raft/sparse/linalg/detail/spectral.cuh | 2 +- cpp/include/raft/sparse/linalg/norm.cuh | 10 +- cpp/include/raft/sparse/mst/mst.cuh | 8 +- cpp/include/raft/sparse/mst/mst.hpp | 8 +- cpp/include/raft/sparse/mst/mst_solver.cuh | 8 +- .../raft/sparse/neighbors/detail/knn.cuh | 2 +- cpp/include/raft/sparse/neighbors/knn.cuh | 6 +- .../sparse/selection/connect_components.cuh | 8 +- cpp/include/raft/sparse/selection/knn.cuh | 8 +- .../raft/sparse/selection/knn_graph.cuh | 8 +- cpp/include/raft/sparse/solver/mst_solver.cuh | 8 +- cpp/include/raft/spatial/knn/ann_common.h | 10 +- cpp/include/raft/spatial/knn/ann_types.hpp | 8 +- cpp/include/raft/spatial/knn/ball_cover.cuh | 6 +- .../raft/spatial/knn/ball_cover_types.hpp | 8 +- .../raft/spatial/knn/detail/ann_utils.cuh | 6 +- .../raft/spatial/knn/epsilon_neighborhood.cuh | 8 +- cpp/include/raft/spatial/knn/ivf_flat.cuh | 8 +- .../raft/spatial/knn/ivf_flat_types.hpp | 8 +- cpp/include/raft/spatial/knn/ivf_pq.cuh | 8 +- cpp/include/raft/spatial/knn/ivf_pq_types.hpp | 8 +- cpp/include/raft/spectral/detail/lapack.hpp | 14 +- .../raft/stats/adjusted_rand_index.cuh | 4 +- cpp/include/raft/stats/completeness_score.cuh | 4 +- cpp/include/raft/stats/cov.cuh | 4 +- cpp/include/raft/stats/detail/minmax.cuh | 5 +- cpp/include/raft/stats/entropy.cuh | 4 +- cpp/include/raft/stats/histogram.cuh | 4 +- cpp/include/raft/stats/homogeneity_score.cuh | 4 +- cpp/include/raft/stats/kl_divergence.cuh | 4 +- cpp/include/raft/stats/mean.cuh | 4 +- cpp/include/raft/stats/mean_center.cuh | 4 +- cpp/include/raft/stats/meanvar.cuh | 2 +- cpp/include/raft/stats/minmax.cuh | 4 +- cpp/include/raft/stats/mutual_info_score.cuh | 4 +- cpp/include/raft/stats/rand_index.cuh | 4 +- cpp/include/raft/stats/stddev.cuh | 4 +- cpp/include/raft/stats/sum.cuh | 4 +- cpp/include/raft/stats/v_measure.cuh | 4 +- cpp/include/raft/stats/weighted_mean.cuh | 4 +- cpp/include/raft/util/bitonic_sort.cuh | 6 +- cpp/include/raft/util/cache.cuh | 6 +- cpp/include/raft/util/cache_util.cuh | 6 +- cpp/include/raft/util/integer_utils.hpp | 6 +- cpp/include/raft/util/vectorized.cuh | 5 +- cpp/include/raft_runtime/neighbors/refine.hpp | 2 +- cpp/scripts/run-clang-format.py | 143 ------------------ cpp/test/core/mdarray.cu | 2 +- cpp/test/core/mdspan_utils.cu | 3 +- cpp/test/distance/dist_canberra.cu | 8 +- cpp/test/distance/dist_correlation.cu | 8 +- cpp/test/distance/dist_cos.cu | 5 +- cpp/test/distance/dist_hamming.cu | 8 +- cpp/test/distance/dist_hellinger.cu | 8 +- cpp/test/distance/dist_inner_product.cu | 6 +- cpp/test/distance/dist_jensen_shannon.cu | 8 +- cpp/test/distance/dist_kl_divergence.cu | 8 +- cpp/test/distance/dist_l1.cu | 8 +- cpp/test/distance/dist_l2_exp.cu | 3 +- cpp/test/distance/dist_l2_sqrt_exp.cu | 6 +- cpp/test/distance/dist_l2_unexp.cu | 3 +- cpp/test/distance/dist_l_inf.cu | 6 +- cpp/test/distance/dist_russell_rao.cu | 8 +- cpp/test/distance/distance_base.cuh | 4 +- cpp/test/linalg/rsvd.cu | 42 ++--- cpp/test/neighbors/ann_cagra.cuh | 2 +- cpp/test/neighbors/knn.cu | 4 +- cpp/test/sparse/spgemmi.cu | 28 ++-- cpp/test/util/bitonic_sort.cu | 22 +-- dependencies.yaml | 4 +- docs/source/developer_guide.md | 107 ++++++++----- thirdparty/pcg/pcg_basic.c | 93 +++++------- 170 files changed, 636 insertions(+), 853 deletions(-) delete mode 100755 cpp/scripts/run-clang-format.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d6e4ecb676..2a70632497 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: additional_dependencies: [toml] args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v11.1.0 + rev: v16.0.1 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0e06076f1a..d192aefa7c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=11.1.0 -- clang=11.1.0 +- clang-tools=16.0.1 +- clang=16.0.1 - cmake>=3.23.1,!=3.25.0 - cuda-profiler-api=11.8.86 - cuda-python >=11.7.1,<12.0 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 5965aaef8f..2013c16fa4 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools=11.1.0 -- clang=11.1.0 +- clang-tools=16.0.1 +- clang=16.0.1 - cmake>=3.23.1,!=3.25.0 - cuda-profiler-api=11.8.86 - cudatoolkit=11.8 diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.h index 1244935c99..46dd66d649 100644 --- a/cpp/bench/ann/src/common/dataset.h +++ b/cpp/bench/ann/src/common/dataset.h @@ -47,7 +47,7 @@ class BinFile { uint32_t subset_first_row = 0, uint32_t subset_size = 0); ~BinFile() { fclose(fp_); } - BinFile(const BinFile&) = delete; + BinFile(const BinFile&) = delete; BinFile& operator=(const BinFile&) = delete; void get_shape(size_t* nrows, int* ndims) @@ -219,7 +219,7 @@ class Dataset { Dataset(const std::string& name, const std::string& distance) : name_(name), distance_(distance) { } - Dataset(const Dataset&) = delete; + Dataset(const Dataset&) = delete; Dataset& operator=(const Dataset&) = delete; virtual ~Dataset(); diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 870119db52..1ff584ca58 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -157,34 +157,33 @@ const std::vector kInputs{ {10, 1000000, 256, true, false, true}, }; -#define SELECTION_REGISTER(KeyT, IdxT, A) \ - namespace BENCHMARK_PRIVATE_NAME(selection) \ - { \ - using SelectK = selection; \ - RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \ +#define SELECTION_REGISTER(KeyT, IdxT, A) \ + namespace BENCHMARK_PRIVATE_NAME(selection) { \ + using SelectK = selection; \ + RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \ } -SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix8bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass); // NOLINT SELECTION_REGISTER(double, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT } // namespace raft::matrix diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh index 8f0b1cb5d9..5431b9492e 100644 --- a/cpp/bench/prims/neighbors/knn.cuh +++ b/cpp/bench/prims/neighbors/knn.cuh @@ -384,11 +384,10 @@ inline const std::vector kNoCopyOnly{TransferStrategy::NO_COPY inline const std::vector kScopeFull{Scope::BUILD_SEARCH}; inline const std::vector kAllScopes{Scope::BUILD_SEARCH, Scope::SEARCH, Scope::BUILD}; -#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope) \ - namespace BENCHMARK_PRIVATE_NAME(knn) \ - { \ - using KNN = knn>; \ - RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \ +#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope) \ + namespace BENCHMARK_PRIVATE_NAME(knn) { \ + using KNN = knn>; \ + RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \ } } // namespace raft::bench::spatial diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 3d23c809c3..4f7cae1ad9 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -436,7 +436,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY)) adjust_centers_kernel(MathT* centers, // [n_clusters, dim] IdxT n_clusters, IdxT dim, - const T* dataset, // [n_rows, dim] + const T* dataset, // [n_rows, dim] IdxT n_rows, const LabelT* labels, // [n_rows] const CounterT* cluster_sizes, // [n_clusters] diff --git a/cpp/include/raft/cluster/single_linkage_types.hpp b/cpp/include/raft/cluster/single_linkage_types.hpp index 9a4fcfef60..cd815622bf 100644 --- a/cpp/include/raft/cluster/single_linkage_types.hpp +++ b/cpp/include/raft/cluster/single_linkage_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,9 +77,7 @@ class linkage_output { } }; -class linkage_output_int : public linkage_output { -}; -class linkage_output_int64 : public linkage_output { -}; +class linkage_output_int : public linkage_output {}; +class linkage_output_int64 : public linkage_output {}; }; // namespace raft::cluster diff --git a/cpp/include/raft/common/cub_wrappers.cuh b/cpp/include/raft/common/cub_wrappers.cuh index e80d7cccd9..dd8fc2d103 100644 --- a/cpp/include/raft/common/cub_wrappers.cuh +++ b/cpp/include/raft/common/cub_wrappers.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please note that there is no equivalent in RAFT's public API" +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please note that there is no equivalent in RAFT's public API" " so this file will eventually be removed altogether.") #include diff --git a/cpp/include/raft/common/device_loads_stores.cuh b/cpp/include/raft/common/device_loads_stores.cuh index f3cfbd81cc..6c62cd70cc 100644 --- a/cpp/include/raft/common/device_loads_stores.cuh +++ b/cpp/include/raft/common/device_loads_stores.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the raft/util version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the raft/util version instead.") #include diff --git a/cpp/include/raft/common/scatter.cuh b/cpp/include/raft/common/scatter.cuh index 0e83f9a5cd..72de79a596 100644 --- a/cpp/include/raft/common/scatter.cuh +++ b/cpp/include/raft/common/scatter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the raft/matrix version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the raft/matrix version instead.") #include diff --git a/cpp/include/raft/common/seive.hpp b/cpp/include/raft/common/seive.hpp index 633c8dd3e1..433b032b0f 100644 --- a/cpp/include/raft/common/seive.hpp +++ b/cpp/include/raft/common/seive.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the raft/util version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the raft/util version instead.") #include diff --git a/cpp/include/raft/core/csr_matrix.hpp b/cpp/include/raft/core/csr_matrix.hpp index c37cfa41c8..95d09d3eea 100644 --- a/cpp/include/raft/core/csr_matrix.hpp +++ b/cpp/include/raft/core/csr_matrix.hpp @@ -141,7 +141,7 @@ class compressed_structure constexpr auto operator=(compressed_structure const&) noexcept( std::is_nothrow_copy_assignable::value) -> compressed_structure& = default; - constexpr auto operator =(compressed_structure&&) noexcept( + constexpr auto operator=(compressed_structure&&) noexcept( std::is_nothrow_move_assignable::value) -> compressed_structure& = default; diff --git a/cpp/include/raft/core/cublas_macros.hpp b/cpp/include/raft/core/cublas_macros.hpp index 855c1228f7..5c56240ccf 100644 --- a/cpp/include/raft/core/cublas_macros.hpp +++ b/cpp/include/raft/core/cublas_macros.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include ///@todo: enable this once we have logger enabled -//#include +// #include #include diff --git a/cpp/include/raft/core/cusolver_macros.hpp b/cpp/include/raft/core/cusolver_macros.hpp index 8f7caf65f3..4477d32118 100644 --- a/cpp/include/raft/core/cusolver_macros.hpp +++ b/cpp/include/raft/core/cusolver_macros.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include #include ///@todo: enable this once logging is enabled -//#include +// #include #include #include diff --git a/cpp/include/raft/core/cusparse_macros.hpp b/cpp/include/raft/core/cusparse_macros.hpp index 8a9aab55f7..21a25ae28c 100644 --- a/cpp/include/raft/core/cusparse_macros.hpp +++ b/cpp/include/raft/core/cusparse_macros.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ #include #include ///@todo: enable this once logging is enabled -//#include +// #include #define _CUSPARSE_ERR_TO_STR(err) \ case err: return #err; diff --git a/cpp/include/raft/core/detail/logger.hpp b/cpp/include/raft/core/detail/logger.hpp index 619fb89452..532aee4d90 100644 --- a/cpp/include/raft/core/detail/logger.hpp +++ b/cpp/include/raft/core/detail/logger.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in future releases." \ - " Please use the version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in future releases." \ + " Please use the version instead.") #include diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index df89811636..d0aea4168e 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -74,7 +74,7 @@ namespace numpy_serializer { #if RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR -#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR #endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 @@ -110,11 +110,9 @@ struct header_t { }; template -struct is_complex : std::false_type { -}; +struct is_complex : std::false_type {}; template -struct is_complex> : std::true_type { -}; +struct is_complex> : std::true_type {}; template , bool> = true> inline dtype_t get_numpy_dtype() diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index adbf3a3666..ca4c5e4a08 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -191,7 +191,7 @@ inline void pop_range() nvtxDomainRangePop(domain_store::value()); } -#else // NVTX_ENABLED +#else // NVTX_ENABLED template inline void push_range(const char* format, Args... args) diff --git a/cpp/include/raft/core/detail/span.hpp b/cpp/include/raft/core/detail/span.hpp index 20500d618b..e6ccb8535c 100644 --- a/cpp/include/raft/core/detail/span.hpp +++ b/cpp/include/raft/core/detail/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,7 @@ template struct extent_value_t : public std::integral_constant< std::size_t, - Count != dynamic_extent ? Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> { -}; + Count != dynamic_extent ? Count : (Extent != dynamic_extent ? Extent - Offset : Extent)> {}; /*! * If N is dynamic_extent, the extent of the returned span E is also @@ -47,31 +46,25 @@ struct extent_value_t template struct extent_as_bytes_value_t : public std::integral_constant { -}; + Extent == dynamic_extent ? Extent : sizeof(T) * Extent> {}; template struct is_allowed_extent_conversion_t : public std::integral_constant { -}; + From == To || From == dynamic_extent || To == dynamic_extent> {}; template struct is_allowed_element_type_conversion_t - : public std::integral_constant::value> { -}; + : public std::integral_constant::value> {}; template -struct is_span_oracle_t : std::false_type { -}; +struct is_span_oracle_t : std::false_type {}; template -struct is_span_oracle_t> : std::true_type { -}; +struct is_span_oracle_t> : std::true_type {}; template -struct is_span_t : public is_span_oracle_t::type> { -}; +struct is_span_t : public is_span_oracle_t::type> {}; template _RAFT_HOST_DEVICE constexpr auto lexicographical_compare(InputIt1 first1, diff --git a/cpp/include/raft/core/device_coo_matrix.hpp b/cpp/include/raft/core/device_coo_matrix.hpp index 35be67431d..ce016dd5e0 100644 --- a/cpp/include/raft/core/device_coo_matrix.hpp +++ b/cpp/include/raft/core/device_coo_matrix.hpp @@ -79,8 +79,7 @@ template using device_coordinate_structure_view = coordinate_structure_view; template -struct is_device_coo_matrix : std::false_type { -}; +struct is_device_coo_matrix : std::false_type {}; template struct is_device_coo_matrix< device_coo_matrix> - : std::true_type { -}; + : std::true_type {}; template constexpr bool is_device_coo_matrix_v = is_device_coo_matrix::value; diff --git a/cpp/include/raft/core/device_csr_matrix.hpp b/cpp/include/raft/core/device_csr_matrix.hpp index e4ec15f9bd..869034e925 100644 --- a/cpp/include/raft/core/device_csr_matrix.hpp +++ b/cpp/include/raft/core/device_csr_matrix.hpp @@ -46,8 +46,7 @@ using device_sparsity_owning_csr_matrix = csr_matrix; template -struct is_device_csr_matrix : std::false_type { -}; +struct is_device_csr_matrix : std::false_type {}; template struct is_device_csr_matrix< device_csr_matrix> - : std::true_type { -}; + : std::true_type {}; template constexpr bool is_device_csr_matrix_v = is_device_csr_matrix::value; diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp index f72ae36d64..7510c388fe 100644 --- a/cpp/include/raft/core/device_mdspan.hpp +++ b/cpp/include/raft/core/device_mdspan.hpp @@ -45,11 +45,9 @@ template >; template -struct is_device_mdspan : std::false_type { -}; +struct is_device_mdspan : std::false_type {}; template -struct is_device_mdspan : std::bool_constant { -}; +struct is_device_mdspan : std::bool_constant {}; /** * @\brief Boolean to determine if template type T is either raft::device_mdspan or a derived type @@ -64,11 +62,9 @@ template using is_output_device_mdspan_t = is_device_mdspan>; template -struct is_managed_mdspan : std::false_type { -}; +struct is_managed_mdspan : std::false_type {}; template -struct is_managed_mdspan : std::bool_constant { -}; +struct is_managed_mdspan : std::bool_constant {}; /** * @\brief Boolean to determine if template type T is either raft::managed_mdspan or a derived type diff --git a/cpp/include/raft/core/device_resources.hpp b/cpp/include/raft/core/device_resources.hpp index df6b39a368..1cab36561a 100644 --- a/cpp/include/raft/core/device_resources.hpp +++ b/cpp/include/raft/core/device_resources.hpp @@ -69,7 +69,7 @@ class device_resources : public resources { } device_resources(const device_resources& handle) : resources{handle} {} - device_resources(device_resources&&) = delete; + device_resources(device_resources&&) = delete; device_resources& operator=(device_resources&&) = delete; /** @@ -246,7 +246,7 @@ class stream_syncer { handle_.sync_stream_pool(); } - stream_syncer(const stream_syncer& other) = delete; + stream_syncer(const stream_syncer& other) = delete; stream_syncer& operator=(const stream_syncer& other) = delete; private: diff --git a/cpp/include/raft/core/handle.hpp b/cpp/include/raft/core/handle.hpp index 02efebec9e..2a6b5657e2 100644 --- a/cpp/include/raft/core/handle.hpp +++ b/cpp/include/raft/core/handle.hpp @@ -39,7 +39,7 @@ class handle_t : public raft::device_resources { handle_t(const handle_t& handle) : device_resources{handle} {} - handle_t(handle_t&&) = delete; + handle_t(handle_t&&) = delete; handle_t& operator=(handle_t&&) = delete; /** diff --git a/cpp/include/raft/core/host_coo_matrix.hpp b/cpp/include/raft/core/host_coo_matrix.hpp index 8fabf5aa95..32e7a9e3c4 100644 --- a/cpp/include/raft/core/host_coo_matrix.hpp +++ b/cpp/include/raft/core/host_coo_matrix.hpp @@ -78,8 +78,7 @@ template using host_coordinate_structure_view = coordinate_structure_view; template -struct is_host_coo_matrix : std::false_type { -}; +struct is_host_coo_matrix : std::false_type {}; template struct is_host_coo_matrix< host_coo_matrix> - : std::true_type { -}; + : std::true_type {}; template constexpr bool is_host_coo_matrix_v = is_host_coo_matrix::value; diff --git a/cpp/include/raft/core/host_csr_matrix.hpp b/cpp/include/raft/core/host_csr_matrix.hpp index c64bcdcea6..86199335f2 100644 --- a/cpp/include/raft/core/host_csr_matrix.hpp +++ b/cpp/include/raft/core/host_csr_matrix.hpp @@ -45,8 +45,7 @@ using host_sparsity_owning_csr_matrix = csr_matrix; template -struct is_host_csr_matrix : std::false_type { -}; +struct is_host_csr_matrix : std::false_type {}; template struct is_host_csr_matrix< host_csr_matrix> - : std::true_type { -}; + : std::true_type {}; template constexpr bool is_host_csr_matrix_v = is_host_csr_matrix::value; diff --git a/cpp/include/raft/core/host_mdspan.hpp b/cpp/include/raft/core/host_mdspan.hpp index a6cdec7a84..9a675680ac 100644 --- a/cpp/include/raft/core/host_mdspan.hpp +++ b/cpp/include/raft/core/host_mdspan.hpp @@ -37,11 +37,9 @@ template >; template -struct is_host_mdspan : std::false_type { -}; +struct is_host_mdspan : std::false_type {}; template -struct is_host_mdspan : std::bool_constant { -}; +struct is_host_mdspan : std::bool_constant {}; /** * @\brief Boolean to determine if template type T is either raft::host_mdspan or a derived type diff --git a/cpp/include/raft/core/interruptible.hpp b/cpp/include/raft/core/interruptible.hpp index 0cc4af2bbf..62e481a801 100644 --- a/cpp/include/raft/core/interruptible.hpp +++ b/cpp/include/raft/core/interruptible.hpp @@ -172,10 +172,10 @@ class interruptible { inline void cancel() noexcept { continue_.clear(std::memory_order_relaxed); } // don't allow the token to leave the shared_ptr - interruptible(interruptible const&) = delete; - interruptible(interruptible&&) = delete; + interruptible(interruptible const&) = delete; + interruptible(interruptible&&) = delete; auto operator=(interruptible const&) -> interruptible& = delete; - auto operator=(interruptible&&) -> interruptible& = delete; + auto operator=(interruptible&&) -> interruptible& = delete; private: /** Global registry of thread-local cancellation stores. */ diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp index 192d160d45..2e0d1117a1 100644 --- a/cpp/include/raft/core/kvp.hpp +++ b/cpp/include/raft/core/kvp.hpp @@ -32,8 +32,8 @@ struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type - Key key; ///< Item key - Value value; ///< Item value + Key key; ///< Item key + Value value; ///< Item value /// Constructor RAFT_INLINE_FUNCTION KeyValuePair() {} diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp index 88f90485dd..e1209835c9 100644 --- a/cpp/include/raft/core/mdarray.hpp +++ b/cpp/include/raft/core/mdarray.hpp @@ -55,12 +55,10 @@ class array_interface { namespace detail { template -struct is_array_interface : std::false_type { -}; +struct is_array_interface : std::false_type {}; template struct is_array_interface().view())>> - : std::bool_constant().view())>> { -}; + : std::bool_constant().view())>> {}; template using is_array_interface_t = is_array_interface>; @@ -75,16 +73,13 @@ inline constexpr bool is_array_interface_v = is_array_interface -struct is_array_interface : std::true_type { -}; +struct is_array_interface : std::true_type {}; template -struct is_array_interface : detail::is_array_interface_t { -}; +struct is_array_interface : detail::is_array_interface_t {}; template struct is_array_interface : std::conditional_t, is_array_interface, - std::false_type> { -}; + std::false_type> {}; /** * @\brief Boolean to determine if variadic template types Tn are raft::array_interface * or derived type or any type that has a member function `view()` that returns either @@ -177,9 +172,9 @@ class mdarray constexpr mdarray(mdarray&&) noexcept(std::is_nothrow_move_constructible::value) = default; - constexpr auto operator =(mdarray const&) noexcept( + constexpr auto operator=(mdarray const&) noexcept( std::is_nothrow_copy_assignable::value) -> mdarray& = default; - constexpr auto operator =(mdarray&&) noexcept( + constexpr auto operator=(mdarray&&) noexcept( std::is_nothrow_move_assignable::value) -> mdarray& = default; ~mdarray() noexcept(std::is_nothrow_destructible::value) = default; diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp index 1c69cdd973..cd9ca26ed9 100644 --- a/cpp/include/raft/core/mdspan.hpp +++ b/cpp/include/raft/core/mdspan.hpp @@ -85,28 +85,22 @@ template *); template -struct is_mdspan : std::false_type { -}; +struct is_mdspan : std::false_type {}; template struct is_mdspan()))>> - : std::true_type { -}; + : std::true_type {}; template -struct is_input_mdspan : std::false_type { -}; +struct is_input_mdspan : std::false_type {}; template struct is_input_mdspan()))>> - : std::bool_constant> { -}; + : std::bool_constant> {}; template -struct is_output_mdspan : std::false_type { -}; +struct is_output_mdspan : std::false_type {}; template struct is_output_mdspan()))>> - : std::bool_constant> { -}; + : std::bool_constant> {}; template using is_mdspan_t = is_mdspan>; diff --git a/cpp/include/raft/core/nvtx.hpp b/cpp/include/raft/core/nvtx.hpp index 09a41f10a6..57338c32c7 100644 --- a/cpp/include/raft/core/nvtx.hpp +++ b/cpp/include/raft/core/nvtx.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -144,9 +144,9 @@ class range { ~range() { pop_range(); } /* This object is not meant to be touched. */ - range(const range&) = delete; - range(range&&) = delete; - auto operator=(const range&) -> range& = delete; + range(const range&) = delete; + range(range&&) = delete; + auto operator=(const range&) -> range& = delete; auto operator=(range&&) -> range& = delete; static auto operator new(std::size_t) -> void* = delete; static auto operator new[](std::size_t) -> void* = delete; diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index 8e331293bf..2dc4eb1f9d 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -42,7 +42,7 @@ enum resource_type { THRUST_POLICY, // thrust execution policy WORKSPACE_RESOURCE, // rmm device memory resource - LAST_KEY // reserved for the last key + LAST_KEY // reserved for the last key }; /** diff --git a/cpp/include/raft/core/resources.hpp b/cpp/include/raft/core/resources.hpp index 64e281e934..4de7d43e76 100644 --- a/cpp/include/raft/core/resources.hpp +++ b/cpp/include/raft/core/resources.hpp @@ -67,7 +67,7 @@ class resources { * Note that this does not create any new resources. */ resources(const resources& res) : factories_(res.factories_), resources_(res.resources_) {} - resources(resources&&) = delete; + resources(resources&&) = delete; resources& operator=(resources&&) = delete; /** diff --git a/cpp/include/raft/core/span.hpp b/cpp/include/raft/core/span.hpp index 188d58c896..a896ba1977 100644 --- a/cpp/include/raft/core/span.hpp +++ b/cpp/include/raft/core/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,7 +104,7 @@ class span { constexpr span(span&& other) noexcept = default; constexpr auto operator=(span const& other) noexcept -> span& = default; - constexpr auto operator=(span&& other) noexcept -> span& = default; + constexpr auto operator=(span&& other) noexcept -> span& = default; constexpr auto begin() const noexcept -> iterator { return data(); } diff --git a/cpp/include/raft/core/temporary_device_buffer.hpp b/cpp/include/raft/core/temporary_device_buffer.hpp index 194471c5de..4baa7e9597 100644 --- a/cpp/include/raft/core/temporary_device_buffer.hpp +++ b/cpp/include/raft/core/temporary_device_buffer.hpp @@ -55,10 +55,10 @@ class temporary_device_buffer { static constexpr bool is_const_pointer_ = std::is_const_v; public: - temporary_device_buffer(temporary_device_buffer const&) = delete; + temporary_device_buffer(temporary_device_buffer const&) = delete; temporary_device_buffer& operator=(temporary_device_buffer const&) = delete; - constexpr temporary_device_buffer(temporary_device_buffer&&) = default; + constexpr temporary_device_buffer(temporary_device_buffer&&) = default; constexpr temporary_device_buffer& operator=(temporary_device_buffer&&) = default; /** diff --git a/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh b/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh index 7a4fe0ce83..68e843c6f5 100644 --- a/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh +++ b/cpp/include/raft/distance/detail/distance_ops/cutlass.cuh @@ -30,13 +30,11 @@ namespace raft::distance::detail::ops { // This pattern is described in: // https://en.cppreference.com/w/cpp/types/void_t template -struct has_cutlass_op : std::false_type { -}; +struct has_cutlass_op : std::false_type {}; // Specialization recognizes types that do support CUTLASS template struct has_cutlass_op().get_cutlass_op())>> - : std::true_type { -}; + : std::true_type {}; } // namespace raft::distance::detail::ops diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh index 55da634145..5a33c9ce4a 100644 --- a/cpp/include/raft/distance/detail/masked_distance_base.cuh +++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh @@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass { } // tile_idx_n } // idx_g rowEpilog_op(tile_idx_m); - } // tile_idx_m + } // tile_idx_m } private: diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh index c6b09be31e..58b5daa8ca 100644 --- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh +++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh @@ -18,7 +18,7 @@ #include // ceildiv #include // RAFT_CUDA_TRY -#include // size_t +#include // size_t namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h index 67c01448dc..ebe6d0c80a 100644 --- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h +++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,8 +57,8 @@ namespace threadblock { /// /// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator /// -template diff --git a/cpp/include/raft/lap/lap.cuh b/cpp/include/raft/lap/lap.cuh index ca7d5e96a9..f7828294cd 100644 --- a/cpp/include/raft/lap/lap.cuh +++ b/cpp/include/raft/lap/lap.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the raft/solver version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the raft/solver version instead.") #include diff --git a/cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.hpp index 30f2b53e52..5472422053 100644 --- a/cpp/include/raft/lap/lap.hpp +++ b/cpp/include/raft/lap/lap.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the cuh version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") #include diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh index 608c63e1a9..c19f491319 100644 --- a/cpp/include/raft/linalg/add.cuh +++ b/cpp/include/raft/linalg/add.cuh @@ -216,7 +216,7 @@ void add_scalar(raft::device_resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh index ed083a1590..88c49d1f42 100644 --- a/cpp/include/raft/linalg/binary_op.cuh +++ b/cpp/include/raft/linalg/binary_op.cuh @@ -82,7 +82,7 @@ void binary_op(raft::device_resources const& handle, InType in1, InType in2, Out /** @} */ // end of group binary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh index 674be207d8..48c121c359 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -159,7 +159,7 @@ void coalesced_reduction(raft::device_resources const& handle, /** @} */ // end of group coalesced_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index 4321e13d95..3b1e8c41c4 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,7 +100,7 @@ struct KernelPolicy { SmemSize = 2 * SmemPage * sizeof(DataT), }; // enum -}; // struct KernelPolicy +}; // struct KernelPolicy template struct ColKernelPolicy { @@ -151,8 +151,7 @@ struct ColKernelPolicy { * @{ */ template -struct Policy4x4 { -}; +struct Policy4x4 {}; template struct Policy4x4 { @@ -174,8 +173,7 @@ struct Policy4x4 { * */ template -struct Policy4x4Skinny { -}; +struct Policy4x4Skinny {}; template struct Policy4x4Skinny { @@ -194,8 +192,7 @@ struct Policy4x4Skinny { * @{ */ template -struct Policy2x8 { -}; +struct Policy2x8 {}; template struct Policy2x8 { diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 87a195757c..5a7356a4c2 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -41,9 +41,9 @@ class cublas_device_pointer_mode { } } auto operator=(const cublas_device_pointer_mode&) -> cublas_device_pointer_mode& = delete; - auto operator=(cublas_device_pointer_mode&&) -> cublas_device_pointer_mode& = delete; - static auto operator new(std::size_t) -> void* = delete; - static auto operator new[](std::size_t) -> void* = delete; + auto operator=(cublas_device_pointer_mode&&) -> cublas_device_pointer_mode& = delete; + static auto operator new(std::size_t) -> void* = delete; + static auto operator new[](std::size_t) -> void* = delete; ~cublas_device_pointer_mode() { @@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - float* const A[], // NOLINT + float* const A[], // NOLINT int lda, int* P, int* info, @@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - double* const A[], // NOLINT + double* const A[], // NOLINT int lda, int* P, int* info, diff --git a/cpp/include/raft/linalg/detail/map_then_reduce.cuh b/cpp/include/raft/linalg/detail/map_then_reduce.cuh index 70bb2df4f5..c22ef09809 100644 --- a/cpp/include/raft/linalg/detail/map_then_reduce.cuh +++ b/cpp/include/raft/linalg/detail/map_then_reduce.cuh @@ -25,8 +25,7 @@ namespace raft { namespace linalg { namespace detail { -struct sum_tag { -}; +struct sum_tag {}; template __device__ void reduce(OutType* out, const InType acc, sum_tag) diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh index 0b18e6175c..428b9ba618 100644 --- a/cpp/include/raft/linalg/divide.cuh +++ b/cpp/include/raft/linalg/divide.cuh @@ -95,7 +95,7 @@ void divide_scalar(raft::device_resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 03e94a10b1..7829f8e49f 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -219,7 +219,7 @@ void eig_jacobi(raft::device_resources const& handle, /** @} */ // end of eig -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh index 96846003f6..019ec9f7ac 100644 --- a/cpp/include/raft/linalg/gemv.cuh +++ b/cpp/include/raft/linalg/gemv.cuh @@ -304,6 +304,6 @@ void gemv(raft::device_resources const& handle, } /** @} */ // end of gemv -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lanczos.cuh b/cpp/include/raft/linalg/lanczos.cuh index c9f3e0010e..04e9980583 100644 --- a/cpp/include/raft/linalg/lanczos.cuh +++ b/cpp/include/raft/linalg/lanczos.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ #pragma once -#pragma message(__FILE__ \ - " is deprecated and will be removed in a future release." \ - " Please use the sparse solvers version instead.") +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the sparse solvers version instead.") #include diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh index b36a9eba96..c753215737 100644 --- a/cpp/include/raft/linalg/lstsq.cuh +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -244,7 +244,7 @@ void lstsq_qr(raft::device_resources const& handle, /** @} */ // end of lstsq -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh index 59b2ca5ee5..6c65626ac5 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.cuh +++ b/cpp/include/raft/linalg/matrix_vector_op.cuh @@ -238,7 +238,7 @@ void matrix_vector_op(raft::device_resources const& handle, /** @} */ // end of group matrix_vector_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh index 62f4896d01..317c085673 100644 --- a/cpp/include/raft/linalg/mean_squared_error.cuh +++ b/cpp/include/raft/linalg/mean_squared_error.cuh @@ -74,7 +74,7 @@ void mean_squared_error(raft::device_resources const& handle, /** @} */ // end of group mean_squared_error -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh index 574b88c63d..bdca641616 100644 --- a/cpp/include/raft/linalg/multiply.cuh +++ b/cpp/include/raft/linalg/multiply.cuh @@ -97,7 +97,7 @@ void multiply_scalar( /** @} */ // end of group multiply -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh index 1fdfcb3780..057d6f6827 100644 --- a/cpp/include/raft/linalg/power.cuh +++ b/cpp/include/raft/linalg/power.cuh @@ -153,7 +153,7 @@ void power_scalar( /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh index ae5457c44f..06f62f207e 100644 --- a/cpp/include/raft/linalg/reduce.cuh +++ b/cpp/include/raft/linalg/reduce.cuh @@ -161,7 +161,7 @@ void reduce(raft::device_resources const& handle, /** @} */ // end of group reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh index 2b744d8134..71c8cf14a1 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh @@ -112,7 +112,7 @@ void reduce_cols_by_key( /** @} */ // end of group reduce_cols_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh index 484b60238b..0e83c9aa2b 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh @@ -191,7 +191,7 @@ void reduce_rows_by_key( /** @} */ // end of group reduce_rows_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh index eb94547f13..8a32467873 100644 --- a/cpp/include/raft/linalg/rsvd.cuh +++ b/cpp/include/raft/linalg/rsvd.cuh @@ -765,7 +765,7 @@ void rsvd_perc_symmetric_jacobi(Args... args) /** @} */ // end of group rsvd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh index 55e661897d..eecc719617 100644 --- a/cpp/include/raft/linalg/sqrt.cuh +++ b/cpp/include/raft/linalg/sqrt.cuh @@ -83,7 +83,7 @@ void sqrt(raft::device_resources const& handle, InType in, OutType out) /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh index f58dfe28b3..25be368865 100644 --- a/cpp/include/raft/linalg/strided_reduction.cuh +++ b/cpp/include/raft/linalg/strided_reduction.cuh @@ -170,7 +170,7 @@ void strided_reduction(raft::device_resources const& handle, /** @} */ // end of group strided_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh index da995b7a2a..cbd6b9df59 100644 --- a/cpp/include/raft/linalg/subtract.cuh +++ b/cpp/include/raft/linalg/subtract.cuh @@ -222,7 +222,7 @@ void subtract_scalar( /** @} */ // end of group subtract -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index 4b78f2ef61..801d271fe9 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -415,7 +415,7 @@ void svd_reconstruction(raft::device_resources const& handle, /** @} */ // end of group svd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh index 1e347d69be..ce95e98499 100644 --- a/cpp/include/raft/linalg/ternary_op.cuh +++ b/cpp/include/raft/linalg/ternary_op.cuh @@ -83,7 +83,7 @@ void ternary_op( /** @} */ // end of group ternary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index a0f418b4f7..2f31cfd722 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -102,7 +102,7 @@ auto transpose(raft::device_resources const& handle, /** @} */ // end of group transpose -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh index 23f932d2f2..58ff2f6bd6 100644 --- a/cpp/include/raft/linalg/unary_op.cuh +++ b/cpp/include/raft/linalg/unary_op.cuh @@ -124,7 +124,7 @@ void write_only_unary_op(const raft::device_resources& handle, OutType out, Lamb /** @} */ // end of group unary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh index a4daf097e5..6546a48279 100644 --- a/cpp/include/raft/matrix/col_wise_sort.cuh +++ b/cpp/include/raft/matrix/col_wise_sort.cuh @@ -133,6 +133,6 @@ void sort_cols_per_row(Args... args) /** @} */ // end of group col_wise_sort -}; // end namespace raft::matrix +}; // end namespace raft::matrix #endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh index d362b73792..93d405da48 100644 --- a/cpp/include/raft/matrix/detail/select_warpsort.cuh +++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh @@ -870,8 +870,7 @@ struct launch_setup { }; template