From cbc28a2f0575dff6a248f35e1a843d6883bd01a6 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Fri, 22 Jul 2022 10:54:12 -0400 Subject: [PATCH 01/58] DOC --- CHANGELOG.md | 4 ++++ ci/cpu/build.sh | 2 +- ci/gpu/build.sh | 2 +- conda/environments/raft_dev_cuda11.0.yml | 12 ++++++------ conda/environments/raft_dev_cuda11.2.yml | 12 ++++++------ conda/environments/raft_dev_cuda11.4.yml | 12 ++++++------ conda/environments/raft_dev_cuda11.5.yml | 12 ++++++------ cpp/CMakeLists.txt | 4 ++-- docs/source/conf.py | 4 ++-- python/pylibraft/CMakeLists.txt | 4 ++-- python/raft/CMakeLists.txt | 2 +- 11 files changed, 37 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36ada7467e..4e08e79189 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# raft 22.10.00 (Date TBD) + +Please see https://github.com/rapidsai/raft/releases/tag/v22.10.00a for the latest changes to this development branch. + # raft 22.08.00 (Date TBD) Please see https://github.com/rapidsai/raft/releases/tag/v22.08.00a for the latest changes to this development branch. diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 8f4fc74b40..75d61e6b5f 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -36,7 +36,7 @@ export CMAKE_GENERATOR="Ninja" export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld" # ucx-py version -export UCX_PY_VERSION='0.27.*' +export UCX_PY_VERSION='0.28.*' ################################################################################ # SETUP - Check environment diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4689b8fcc5..1c7b72bec7 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -31,7 +31,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` unset GIT_DESCRIBE_TAG # ucx-py version -export UCX_PY_VERSION='0.27.*' +export UCX_PY_VERSION='0.28.*' ################################################################################ # SETUP - Check environment diff --git a/conda/environments/raft_dev_cuda11.0.yml b/conda/environments/raft_dev_cuda11.0.yml index 4b019e6892..004cb3c626 100644 --- a/conda/environments/raft_dev_cuda11.0.yml +++ b/conda/environments/raft_dev_cuda11.0.yml @@ -11,13 +11,13 @@ dependencies: - cython>=0.29,<0.30 - cmake>=3.20.1,!=3.23.0 - scikit-build>=0.13.1 -- rapids-build-env=22.08.* -- rapids-notebook-env=22.08.* -- rapids-doc-env=22.08.* -- rmm=22.08.* -- dask-cuda=22.08.* +- rapids-build-env=22.10.* +- rapids-notebook-env=22.10.* +- rapids-doc-env=22.10.* +- rmm=22.10.* +- dask-cuda=22.10.* - ucx>=1.13.0 -- ucx-py=0.27.* +- ucx-py=0.28.* - ucx-proc=*=gpu - doxygen>=1.8.20 - libfaiss>=1.7.0 diff --git a/conda/environments/raft_dev_cuda11.2.yml b/conda/environments/raft_dev_cuda11.2.yml index 0dcf9cd37f..b43df6e2be 100644 --- a/conda/environments/raft_dev_cuda11.2.yml +++ b/conda/environments/raft_dev_cuda11.2.yml @@ -11,13 +11,13 @@ dependencies: - cython>=0.29,<0.30 - cmake>=3.20.1,!=3.23.0 - scikit-build>=0.13.1 -- rapids-build-env=22.08.* -- rapids-notebook-env=22.08.* -- rapids-doc-env=22.08.* -- rmm=22.08.* -- dask-cuda=22.08.* +- rapids-build-env=22.10.* +- rapids-notebook-env=22.10.* +- rapids-doc-env=22.10.* +- rmm=22.10.* +- dask-cuda=22.10.* - ucx>=1.13.0 -- ucx-py=0.27.* +- ucx-py=0.28.* - ucx-proc=*=gpu - doxygen>=1.8.20 - libfaiss>=1.7.0 diff --git a/conda/environments/raft_dev_cuda11.4.yml b/conda/environments/raft_dev_cuda11.4.yml index f97301c448..fe76711b85 100644 --- a/conda/environments/raft_dev_cuda11.4.yml +++ b/conda/environments/raft_dev_cuda11.4.yml @@ -11,13 +11,13 @@ dependencies: - cython>=0.29,<0.30 - cmake>=3.20.1,!=3.23.0 - scikit-build>=0.13.1 -- rapids-build-env=22.08.* -- rapids-notebook-env=22.08.* -- rapids-doc-env=22.08.* -- rmm=22.08.* -- dask-cuda=22.08.* +- rapids-build-env=22.10.* +- rapids-notebook-env=22.10.* +- rapids-doc-env=22.10.* +- rmm=22.10.* +- dask-cuda=22.10.* - ucx>=1.13.0 -- ucx-py=0.27.* +- ucx-py=0.28.* - ucx-proc=*=gpu - doxygen>=1.8.20 - libfaiss>=1.7.0 diff --git a/conda/environments/raft_dev_cuda11.5.yml b/conda/environments/raft_dev_cuda11.5.yml index 43b4fa0bcd..f943de22cd 100644 --- a/conda/environments/raft_dev_cuda11.5.yml +++ b/conda/environments/raft_dev_cuda11.5.yml @@ -12,13 +12,13 @@ dependencies: - cython>=0.29,<0.30 - cmake>=3.20.1,!=3.23.0 - scikit-build>=0.13.1 -- rapids-build-env=22.08.* -- rapids-notebook-env=22.08.* -- rapids-doc-env=22.08.* -- rmm=22.08.* -- dask-cuda=22.08.* +- rapids-build-env=22.10.* +- rapids-notebook-env=22.10.* +- rapids-doc-env=22.10.* +- rmm=22.10.* +- dask-cuda=22.10.* - ucx>=1.13.0 -- ucx-py=0.27.* +- ucx-py=0.28.* - ucx-proc=*=gpu - doxygen>=1.8.20 - libfaiss>=1.7.0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 596cf8547d..f6d955633a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -17,7 +17,7 @@ set(RAPIDS_VERSION "22.06") set(RAFT_VERSION "${RAPIDS_VERSION}.00") cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/RAPIDS.cmake +file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) include(rapids-cmake) @@ -28,7 +28,7 @@ include(rapids-find) rapids_cuda_init_architectures(RAFT) -project(RAFT VERSION 22.08.00 LANGUAGES CXX CUDA) +project(RAFT VERSION 22.10.00 LANGUAGES CXX CUDA) # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures diff --git a/docs/source/conf.py b/docs/source/conf.py index 9d3764853d..5a3cbba36a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -84,9 +84,9 @@ # built documents. # # The short X.Y version. -version = '22.08' +version = '22.10' # The full version, including alpha/beta/rc tags. -release = '22.08.00' +release = '22.10.00' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index be615fc091..7586489911 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -14,9 +14,9 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -set(pylibraft_version 22.08.00) +set(pylibraft_version 22.10.00) -file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/RAPIDS.cmake +file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) diff --git a/python/raft/CMakeLists.txt b/python/raft/CMakeLists.txt index 60f0aea89f..8f9b703eee 100644 --- a/python/raft/CMakeLists.txt +++ b/python/raft/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -set(pyraft_version 22.08.00) +set(pyraft_version 22.10.00) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.06/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) From 8b4473412dca90f7377e25115eb31ca687d47070 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 1 Aug 2022 11:27:29 -0400 Subject: [PATCH 02/58] fix versions --- ci/release/update-version.sh | 3 ++- cpp/CMakeLists.txt | 6 +++--- fetch_rapids.cmake | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 175a93bf5b..79a9bff3be 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -31,7 +31,8 @@ function sed_runner() { sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak } -sed_runner 's/'"RAFT VERSION .* LANGUAGES"'/'"RAFT VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt +sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/CMakeLists.txt +sed_runner "s/set(RAFT_VERSION .*)/set(RAFT_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt sed_runner 's/'"pylibraft_version .*)"'/'"pylibraft_version ${NEXT_FULL_TAG})"'/g' python/pylibraft/CMakeLists.txt sed_runner 's/'"pyraft_version .*)"'/'"pyraft_version ${NEXT_FULL_TAG})"'/g' python/raft/CMakeLists.txt sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f15ab200c2..4fef0b2885 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. #============================================================================= -set(RAPIDS_VERSION "22.06") -set(RAFT_VERSION "${RAPIDS_VERSION}.00") +set(RAPIDS_VERSION "22.10") +set(RAFT_VERSION "22.10.00") cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) include(../fetch_rapids.cmake) @@ -26,7 +26,7 @@ include(rapids-find) rapids_cuda_init_architectures(RAFT) -project(RAFT VERSION 22.10.00 LANGUAGES CXX CUDA) +project(RAFT VERSION ${RAFT_VERSION} LANGUAGES CXX CUDA) # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 2b5c7e9d35..ba00380077 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -11,7 +11,7 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/RAPIDS.cmake +file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake ) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) From b0fdebb0b324732b9f0a309f5defcadc3103fee4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 2 Aug 2022 13:38:03 -0700 Subject: [PATCH 03/58] Various fixes for build.sh (#771) This PR includes the following fixes for build.sh: - The parallel argument is for the build tool (make/ninja) and not CMake or setup.py, so it needs to be passed after a second `--`. - pylibraft requires the distances component of libraft to be compiled. If pylibraft is passed to build.sh, we need to force building libraft with the distance component - build.sh doesn't install by default, so the `CMAKE_PREFIX_PATH` that the Python build points to needs to also include the build directory in addition to the install directory. I think giving the build directory precedence makes more sense, but that could be changed if necessary. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Thejaswi. N. S (https://github.com/teju85) URL: https://github.com/rapidsai/raft/pull/771 --- build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 8b00fa69dd..9eebdafe5c 100755 --- a/build.sh +++ b/build.sh @@ -182,7 +182,7 @@ if hasArg --compile-nn || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then CMAKE_TARGET="${CMAKE_TARGET};raft_nn_lib" fi -if hasArg --compile-dist || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then +if hasArg --compile-dist || hasArg --compile-libs || hasArg pylibraft || (( ${NUMARGS} == 0 )); then COMPILE_DIST_LIBRARY=ON CMAKE_TARGET="${CMAKE_TARGET};raft_distance_lib" fi @@ -250,7 +250,7 @@ fi ################################################################################ # Configure for building all C++ targets -if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench; then +if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg pylibraft || hasArg docs || hasArg tests || hasArg bench; then if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then RAFT_CMAKE_CUDA_ARCHITECTURES="NATIVE" echo "Building for the architecture of the GPU in the system..." @@ -292,7 +292,7 @@ fi if (( ${NUMARGS} == 0 )) || hasArg pyraft || hasArg docs; then cd ${REPODIR}/python/raft - python setup.py build_ext -j${PARALLEL_LEVEL:-1} --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} + python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS} fi @@ -302,7 +302,7 @@ fi if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then cd ${REPODIR}/python/pylibraft - python setup.py build_ext -j${PARALLEL_LEVEL:-1} --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} + python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} if [[ ${INSTALL_TARGET} != "" ]]; then python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS} fi From 5b0461f9d1bcbb829722c33fa687d9e881d31ead Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 9 Aug 2022 09:04:41 -0500 Subject: [PATCH 04/58] Unpin `dask` and `distributed` for development (#783) This PR unpins `dask` & `distributed` for `22.10` development. xref: https://github.com/rapidsai/cudf/pull/11492 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ray Douglass (https://github.com/raydouglass) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/783 --- ci/gpu/build.sh | 4 ++-- conda/environments/raft_dev_cuda11.0.yml | 4 ++-- conda/environments/raft_dev_cuda11.2.yml | 4 ++-- conda/environments/raft_dev_cuda11.4.yml | 4 ++-- conda/environments/raft_dev_cuda11.5.yml | 4 ++-- conda/recipes/pyraft/meta.yaml | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0fd85e935c..1c7b72bec7 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -79,8 +79,8 @@ fi # Install the master version of dask, distributed, and dask-ml gpuci_logger "Install the master version of dask and distributed" set -x -pip install "git+https://github.com/dask/distributed.git@2022.7.1" --upgrade --no-deps -pip install "git+https://github.com/dask/dask.git@2022.7.1" --upgrade --no-deps +pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps +pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps set +x gpuci_logger "Check GPU usage" diff --git a/conda/environments/raft_dev_cuda11.0.yml b/conda/environments/raft_dev_cuda11.0.yml index e5355bc61c..004cb3c626 100644 --- a/conda/environments/raft_dev_cuda11.0.yml +++ b/conda/environments/raft_dev_cuda11.0.yml @@ -27,8 +27,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@2022.7.1 - - git+https://github.com/dask/distributed.git@2022.7.1 + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.2.yml b/conda/environments/raft_dev_cuda11.2.yml index cdaf0ebcc4..b43df6e2be 100644 --- a/conda/environments/raft_dev_cuda11.2.yml +++ b/conda/environments/raft_dev_cuda11.2.yml @@ -27,8 +27,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@2022.7.1 - - git+https://github.com/dask/distributed.git@2022.7.1 + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.4.yml b/conda/environments/raft_dev_cuda11.4.yml index c0d680a77a..fe76711b85 100644 --- a/conda/environments/raft_dev_cuda11.4.yml +++ b/conda/environments/raft_dev_cuda11.4.yml @@ -27,8 +27,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@2022.7.1 - - git+https://github.com/dask/distributed.git@2022.7.1 + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.5.yml b/conda/environments/raft_dev_cuda11.5.yml index 04925b704b..f943de22cd 100644 --- a/conda/environments/raft_dev_cuda11.5.yml +++ b/conda/environments/raft_dev_cuda11.5.yml @@ -28,8 +28,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@2022.7.1 - - git+https://github.com/dask/distributed.git@2022.7.1 + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/recipes/pyraft/meta.yaml b/conda/recipes/pyraft/meta.yaml index 7a2e8d6c49..025f96bf50 100644 --- a/conda/recipes/pyraft/meta.yaml +++ b/conda/recipes/pyraft/meta.yaml @@ -51,8 +51,8 @@ requirements: - ucx >={{ ucx_version }} - ucx-py {{ ucx_py_version }} - ucx-proc=*=gpu - - dask==2022.7.1 - - distributed==2022.7.1 + - dask>=2022.7.1 + - distributed>=2022.7.1 - cuda-python >=11.5,<11.7.1 - joblib >=0.11 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} From 3b26f00407dcd3e8bddfed09a1fd3c09323cf142 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 9 Aug 2022 14:26:23 -0400 Subject: [PATCH 05/58] Use rapids-cmake 22.10 best practice for RAPIDS.cmake location (#784) Removes possibility of another projects `RAPIDS.cmake` being used, and removes need to always download a version. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/784 --- fetch_rapids.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index ba00380077..95c7765cfb 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -11,7 +11,9 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake - ${CMAKE_BINARY_DIR}/RAPIDS.cmake -) -include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake + ) +endif() +include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) From c9cce7200e8fe2a383216f74a2b316848cd6e2a5 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Thu, 11 Aug 2022 17:10:53 +0200 Subject: [PATCH 06/58] Fix adj_to_csr_kernel (#785) This PR adds launch bounds to adj_to_csr kernel, making it forward compatible with future architectures. Authors: - Allard Hendriksen (https://github.com/ahendriksen) Approvers: - William Hicks (https://github.com/wphicks) - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/785 --- cpp/bench/sparse/convert_csr.cu | 2 -- .../raft/sparse/convert/detail/adj_to_csr.cuh | 23 +++++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/bench/sparse/convert_csr.cu b/cpp/bench/sparse/convert_csr.cu index 0e701518ab..830fab13cc 100644 --- a/cpp/bench/sparse/convert_csr.cu +++ b/cpp/bench/sparse/convert_csr.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include -#include #include #include diff --git a/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh index e55627c936..4728574b55 100644 --- a/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh +++ b/cpp/include/raft/sparse/convert/detail/adj_to_csr.cuh @@ -29,6 +29,9 @@ namespace sparse { namespace convert { namespace detail { +// Threads per block in adj_to_csr_kernel. +static const constexpr int adj_to_csr_tpb = 512; + /** * @brief Convert dense adjacency matrix into unsorted CSR format. * @@ -58,13 +61,14 @@ namespace detail { * the number of non-zeros in `adj`. */ template -__global__ void adj_to_csr_kernel(const bool* adj, // row-major adjacency matrix - const index_t* row_ind, // precomputed row indices - index_t num_rows, // # rows of adj - index_t num_cols, // # cols of adj - index_t* row_counters, // pre-allocated (zeroed) atomic counters - index_t* out_col_ind // output column indices -) +__global__ void __launch_bounds__(adj_to_csr_tpb) + adj_to_csr_kernel(const bool* adj, // row-major adjacency matrix + const index_t* row_ind, // precomputed row indices + index_t num_rows, // # rows of adj + index_t num_cols, // # cols of adj + index_t* row_counters, // pre-allocated (zeroed) atomic counters + index_t* out_col_ind // output column indices + ) { const int chunk_size = 16; typedef raft::TxN_t chunk_bool; @@ -148,17 +152,16 @@ void adj_to_csr(const raft::handle_t& handle, // independently). If the maximum number of active blocks (num_sms * // occupancy) exceeds the number of rows, assign multiple blocks to a single // row. - int threads_per_block = 1024; int dev_id, sm_count, blocks_per_sm; cudaGetDevice(&dev_id); cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &blocks_per_sm, adj_to_csr_kernel, threads_per_block, 0); + &blocks_per_sm, adj_to_csr_kernel, adj_to_csr_tpb, 0); index_t max_active_blocks = sm_count * blocks_per_sm; index_t blocks_per_row = raft::ceildiv(max_active_blocks, num_rows); index_t grid_rows = raft::ceildiv(max_active_blocks, blocks_per_row); - dim3 block(threads_per_block, 1); + dim3 block(adj_to_csr_tpb, 1); dim3 grid(blocks_per_row, grid_rows); adj_to_csr_kernel From f891dc2bbe23e6d6db9ed9dff7df181c8b35d97d Mon Sep 17 00:00:00 2001 From: William Hicks Date: Tue, 23 Aug 2022 19:13:47 -0400 Subject: [PATCH 07/58] Remove type punning from TxN_t (#781) Avoid undefined behavior related to accessing inactive union members in TxN_t Authors: - William Hicks (https://github.com/wphicks) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Mark Hoemmen (https://github.com/mhoemmen) URL: https://github.com/rapidsai/raft/pull/781 --- .../raft/matrix/detail/linewise_op.cuh | 10 +++++----- cpp/include/raft/vectorized.cuh | 20 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/include/raft/matrix/detail/linewise_op.cuh b/cpp/include/raft/matrix/detail/linewise_op.cuh index 81204bfe66..ee703c5138 100644 --- a/cpp/include/raft/matrix/detail/linewise_op.cuh +++ b/cpp/include/raft/matrix/detail/linewise_op.cuh @@ -83,7 +83,7 @@ struct Linewise { Vec v, w; bool update = true; for (; in < in_end; in += AlignWarp::Value, out += AlignWarp::Value, rowMod += warpPad) { - v.val.internal = __ldcv(in); + *v.vectorized_data() = __ldcv(in); while (rowMod >= rowLen) { rowMod -= rowLen; rowDiv++; @@ -105,7 +105,7 @@ struct Linewise { int l = 0; w.val.data[k] = op(v.val.data[k], (std::ignore = vecs, args[l++])...); } - *out = w.val.internal; + *out = *w.vectorized_data(); } } @@ -138,11 +138,11 @@ struct Linewise { Vec v; const IdxType d = BlockSize * gridDim.x; for (IdxType i = threadIdx.x + blockIdx.x * BlockSize; i < len; i += d) { - v.val.internal = __ldcv(in + i); + *v.vectorized_data() = __ldcv(in + i); #pragma unroll VecElems for (int k = 0; k < VecElems; k++) v.val.data[k] = op(v.val.data[k], args.val.data[k]...); - __stwt(out + i, v.val.internal); + __stwt(out + i, *v.vectorized_data()); } } @@ -172,7 +172,7 @@ struct Linewise { __syncthreads(); { Vec out; - out.val.internal = reinterpret_cast(shm)[threadIdx.x]; + *out.vectorized_data() = reinterpret_cast(shm)[threadIdx.x]; return out; } } diff --git a/cpp/include/raft/vectorized.cuh b/cpp/include/raft/vectorized.cuh index 44c6a74162..b2e4940dc8 100644 --- a/cpp/include/raft/vectorized.cuh +++ b/cpp/include/raft/vectorized.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -269,13 +269,13 @@ struct TxN_t { /** defines the number of 'math_t' types stored by this struct */ static const int Ratio = veclen_; - union { + struct { /** the vectorized data that is used for subsequent operations */ math_t data[Ratio]; - /** internal data used to ensure vectorized loads/stores */ - io_t internal; } val; + __device__ auto* vectorized_data() { return reinterpret_cast(val.data); } + ///@todo: add default constructor /** @@ -310,22 +310,22 @@ struct TxN_t { template DI void load(const math_t* ptr, idx_t idx) { - const io_t* bptr = reinterpret_cast(&ptr[idx]); - val.internal = __ldg(bptr); + const io_t* bptr = reinterpret_cast(&ptr[idx]); + *vectorized_data() = __ldg(bptr); } template DI void load(math_t* ptr, idx_t idx) { - io_t* bptr = reinterpret_cast(&ptr[idx]); - val.internal = *bptr; + io_t* bptr = reinterpret_cast(&ptr[idx]); + *vectorized_data() = *bptr; } template DI void store(math_t* ptr, idx_t idx) { io_t* bptr = reinterpret_cast(&ptr[idx]); - *bptr = val.internal; + *bptr = *vectorized_data(); } /** @} */ }; @@ -336,7 +336,7 @@ struct TxN_t { typedef math_ math_t; static const int Ratio = 1; - union { + struct { math_t data[1]; } val; From 85bbbabd4d5ed4296e86e8cf84571f7544b8fc85 Mon Sep 17 00:00:00 2001 From: "Thejaswi. N. S" Date: Wed, 24 Aug 2022 18:00:47 +0200 Subject: [PATCH 08/58] Exposing python wrapper for the RMAT generator logic (#778) This PR adds a thin python wrapper for the RMAT generator logic (as per the request from @PiotrBigajNV) @cjnolet I was not sure what's the right way to wrap other such RAFT prims to python layer. Hence, I just followed the same path as that of `raft_distance` and created a `raft_random` build target instead. Let me know if you have a better approach for this one. Authors: - Thejaswi. N. S (https://github.com/teju85) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Robert Maynard (https://github.com/robertmaynard) - Jordan Jacobelli (https://github.com/Ethyling) URL: https://github.com/rapidsai/raft/pull/778 --- build.sh | 4 +- ci/gpu/build.sh | 4 +- cpp/CMakeLists.txt | 9 + .../random/rmat_rectangular_generator.hpp | 42 +++++ cpp/src/random/specializations/common.cuh | 33 ++++ ...rmat_rectangular_generator_int64_double.cu | 23 +++ .../rmat_rectangular_generator_int64_float.cu | 23 +++ .../rmat_rectangular_generator_int_double.cu | 23 +++ .../rmat_rectangular_generator_int_float.cu | 23 +++ python/pylibraft/CMakeLists.txt | 7 +- .../pylibraft/distance/CMakeLists.txt | 7 +- .../pylibraft/pylibraft/random/CMakeLists.txt | 30 ++++ .../pylibraft/pylibraft/random/__init__.pxd | 14 ++ python/pylibraft/pylibraft/random/__init__.py | 16 ++ .../random/rmat_rectangular_generator.pyx | 170 ++++++++++++++++++ .../pylibraft/pylibraft/random/rng_state.pxd | 33 ++++ .../pylibraft/pylibraft/test/test_distance.py | 25 +-- .../pylibraft/pylibraft/test/test_random.py | 87 +++++++++ .../pylibraft/pylibraft/testing/__init__.py | 14 ++ python/pylibraft/pylibraft/testing/utils.py | 41 +++++ python/pylibraft/setup.py | 4 +- 21 files changed, 601 insertions(+), 31 deletions(-) create mode 100644 cpp/include/raft_distance/random/rmat_rectangular_generator.hpp create mode 100644 cpp/src/random/specializations/common.cuh create mode 100644 cpp/src/random/specializations/rmat_rectangular_generator_int64_double.cu create mode 100644 cpp/src/random/specializations/rmat_rectangular_generator_int64_float.cu create mode 100644 cpp/src/random/specializations/rmat_rectangular_generator_int_double.cu create mode 100644 cpp/src/random/specializations/rmat_rectangular_generator_int_float.cu create mode 100644 python/pylibraft/pylibraft/random/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/random/__init__.pxd create mode 100644 python/pylibraft/pylibraft/random/__init__.py create mode 100644 python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx create mode 100644 python/pylibraft/pylibraft/random/rng_state.pxd create mode 100644 python/pylibraft/pylibraft/test/test_random.py create mode 100644 python/pylibraft/pylibraft/testing/__init__.py create mode 100644 python/pylibraft/pylibraft/testing/utils.py diff --git a/build.sh b/build.sh index 9eebdafe5c..ce4365985b 100755 --- a/build.sh +++ b/build.sh @@ -35,7 +35,9 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= + +#include +#include + +namespace raft::random::runtime { + +#define FUNC_DECL(IdxT, ProbT) \ + void rmat_rectangular_gen(raft::handle_t const& handle, \ + IdxT* out, \ + IdxT* out_src, \ + IdxT* out_dst, \ + const ProbT* theta, \ + IdxT r_scale, \ + IdxT c_scale, \ + IdxT n_edges, \ + raft::random::RngState& r) + +FUNC_DECL(int, float); +FUNC_DECL(int64_t, float); +FUNC_DECL(int, double); +FUNC_DECL(int64_t, double); + +#undef FUNC_DECL + +} // namespace raft::random::runtime diff --git a/cpp/src/random/specializations/common.cuh b/cpp/src/random/specializations/common.cuh new file mode 100644 index 0000000000..d854087714 --- /dev/null +++ b/cpp/src/random/specializations/common.cuh @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#define FUNC_DEF(IdxT, ProbT) \ + void rmat_rectangular_gen(raft::handle_t const& handle, \ + IdxT* out, \ + IdxT* out_src, \ + IdxT* out_dst, \ + const ProbT* theta, \ + IdxT r_scale, \ + IdxT c_scale, \ + IdxT n_edges, \ + raft::random::RngState& r) \ + { \ + raft::random::rmat_rectangular_gen( \ + out, out_src, out_dst, theta, r_scale, c_scale, n_edges, handle.get_stream(), r); \ + } diff --git a/cpp/src/random/specializations/rmat_rectangular_generator_int64_double.cu b/cpp/src/random/specializations/rmat_rectangular_generator_int64_double.cu new file mode 100644 index 0000000000..4985a64927 --- /dev/null +++ b/cpp/src/random/specializations/rmat_rectangular_generator_int64_double.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.cuh" + +namespace raft::random::runtime { + +FUNC_DEF(int64_t, double); + +} // namespace raft::random::runtime diff --git a/cpp/src/random/specializations/rmat_rectangular_generator_int64_float.cu b/cpp/src/random/specializations/rmat_rectangular_generator_int64_float.cu new file mode 100644 index 0000000000..f42e039bae --- /dev/null +++ b/cpp/src/random/specializations/rmat_rectangular_generator_int64_float.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.cuh" + +namespace raft::random::runtime { + +FUNC_DEF(int64_t, float); + +} // namespace raft::random::runtime diff --git a/cpp/src/random/specializations/rmat_rectangular_generator_int_double.cu b/cpp/src/random/specializations/rmat_rectangular_generator_int_double.cu new file mode 100644 index 0000000000..c29d140f09 --- /dev/null +++ b/cpp/src/random/specializations/rmat_rectangular_generator_int_double.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.cuh" + +namespace raft::random::runtime { + +FUNC_DEF(int, double); + +} // namespace raft::random::runtime diff --git a/cpp/src/random/specializations/rmat_rectangular_generator_int_float.cu b/cpp/src/random/specializations/rmat_rectangular_generator_int_float.cu new file mode 100644 index 0000000000..b07ede6a58 --- /dev/null +++ b/cpp/src/random/specializations/rmat_rectangular_generator_int_float.cu @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common.cuh" + +namespace raft::random::runtime { + +FUNC_DEF(int, float); + +} // namespace raft::random::runtime diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index c80a609beb..5f877187eb 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -35,7 +35,7 @@ option(FIND_RAFT_CPP "Search for existing RAFT C++ installations before defaulti if(FIND_RAFT_CPP) find_package(raft ${pylibraft_version} REQUIRED COMPONENTS distance) if(NOT TARGET raft::raft_distance_lib) - message(FATAL_ERROR "Building against a preexisting libraft library requires the distance component of that library to have been built!") + message(FATAL_ERROR "Building against a preexisting libraft library requires the distance components of that library to have been built!") endif() else() set(raft_FOUND OFF) @@ -59,11 +59,14 @@ if(NOT raft_FOUND) add_subdirectory(../../cpp raft-cpp) # When building the C++ libraries from source we must copy - # libraft_distance.so alongside the pairwise_distance Cython library. + # libraft_distance.so alongside the pairwise_distance and random Cython libraries + # TODO: when we have a single 'compiled' raft library, we shouldn't need this install(TARGETS raft_distance_lib DESTINATION pylibraft/distance) + install(TARGETS raft_distance_lib DESTINATION pylibraft/random) endif() include(rapids-cython) rapids_cython_init() add_subdirectory(pylibraft/distance) +add_subdirectory(pylibraft/random) diff --git a/python/pylibraft/pylibraft/distance/CMakeLists.txt b/python/pylibraft/pylibraft/distance/CMakeLists.txt index f8a7faf450..707ea737b3 100644 --- a/python/pylibraft/pylibraft/distance/CMakeLists.txt +++ b/python/pylibraft/pylibraft/distance/CMakeLists.txt @@ -18,6 +18,11 @@ set(linked_libraries raft::raft raft::distance) # Build all of the Cython targets rapids_cython_create_modules( + CXX SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" - CXX) + MODULE_PREFIX distance_) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/random/CMakeLists.txt b/python/pylibraft/pylibraft/random/CMakeLists.txt new file mode 100644 index 0000000000..bc7357bb8a --- /dev/null +++ b/python/pylibraft/pylibraft/random/CMakeLists.txt @@ -0,0 +1,30 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources rmat_rectangular_generator.pyx) +# TODO: should finally be replaced with 'compiled' library to be more generic, +# when that is available +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX random_) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/random/__init__.pxd b/python/pylibraft/pylibraft/random/__init__.pxd new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/random/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/random/__init__.py b/python/pylibraft/pylibraft/random/__init__.py new file mode 100644 index 0000000000..c34e4e6bdb --- /dev/null +++ b/python/pylibraft/pylibraft/random/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .rmat_rectangular_generator import rmat diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx new file mode 100644 index 0000000000..ea28357201 --- /dev/null +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -0,0 +1,170 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from libc.stdint cimport uintptr_t, int64_t +from cython.operator cimport dereference as deref +from pylibraft.common.handle cimport handle_t +from .rng_state cimport RngState + +from libcpp cimport bool + + +cdef extern from "raft_distance/random/rmat_rectangular_generator.hpp" \ + namespace "raft::random::runtime": + + cdef void rmat_rectangular_gen(const handle_t &handle, + int* out, + int* out_src, + int* out_dst, + const float* theta, + int r_scale, + int c_scale, + int n_edges, + RngState& r) + + cdef void rmat_rectangular_gen(const handle_t &handle, + int64_t* out, + int64_t* out_src, + int64_t* out_dst, + const float* theta, + int64_t r_scale, + int64_t c_scale, + int64_t n_edges, + RngState& r) + + cdef void rmat_rectangular_gen(const handle_t &handle, + int* out, + int* out_src, + int* out_dst, + const double* theta, + int r_scale, + int c_scale, + int n_edges, + RngState& r) + + cdef void rmat_rectangular_gen(const handle_t &handle, + int64_t* out, + int64_t* out_src, + int64_t* out_dst, + const double* theta, + int64_t r_scale, + int64_t c_scale, + int64_t n_edges, + RngState& r) + + +def rmat(out, theta, r_scale, c_scale, seed=12345): + """ + Generate RMAT adjacency list based on the input distribution. + + Parameters + ---------- + + out: CUDA array interface compliant matrix shape (n_edges, 2). This will + contain the src/dst node ids stored consecutively like a pair. + theta: CUDA array interface compliant matrix shape + (max(r_scale, c_scale) * 4) This stores the probability distribution + at each RMAT level + r_scale: log2 of number of source nodes + c_scale: log2 of number of destination nodes + seed: random seed used for reproducibility + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.random import rmat + + n_edges = 5000 + r_scale = 16 + c_scale = 14 + theta_len = max(r_scale, c_scale) * 4 + + out = cp.empty((n_edges, 2), dtype=cp.int32) + theta = cp.random.random_sample(theta_len, dtype=cp.float32) + + rmat(out, theta, r_scale, c_scale) + """ + + if theta is None: + raise Exception("'theta' cannot be None!") + if out is None: + raise Exception("'out' cannot be None!") + + out_cai = out.__cuda_array_interface__ + theta_cai = theta.__cuda_array_interface__ + + n_edges = out_cai["shape"][0] + out_ptr = out_cai["data"][0] + theta_ptr = theta_cai["data"][0] + out_dt = np.dtype(out_cai["typestr"]) + theta_dt = np.dtype(theta_cai["typestr"]) + + cdef RngState *rng = new RngState(seed) + cdef handle_t *h = new handle_t() + + if out_dt == np.int32 and theta_dt == np.float32: + rmat_rectangular_gen(deref(h), + out_ptr, + NULL, + NULL, + theta_ptr, + r_scale, + c_scale, + n_edges, + deref(rng)) + elif out_dt == np.int64 and theta_dt == np.float32: + rmat_rectangular_gen(deref(h), + out_ptr, + NULL, + NULL, + theta_ptr, + r_scale, + c_scale, + n_edges, + deref(rng)) + elif out_dt == np.int32 and theta_dt == np.float64: + rmat_rectangular_gen(deref(h), + out_ptr, + NULL, + NULL, + theta_ptr, + r_scale, + c_scale, + n_edges, + deref(rng)) + elif out_dt == np.int64 and theta_dt == np.float64: + rmat_rectangular_gen(deref(h), + out_ptr, + NULL, + NULL, + theta_ptr, + r_scale, + c_scale, + n_edges, + deref(rng)) + else: + raise ValueError("dtype out=%s and theta=%s not supported" % + (out_dt, theta_dt)) diff --git a/python/pylibraft/pylibraft/random/rng_state.pxd b/python/pylibraft/pylibraft/random/rng_state.pxd new file mode 100644 index 0000000000..9d1e2d17e3 --- /dev/null +++ b/python/pylibraft/pylibraft/random/rng_state.pxd @@ -0,0 +1,33 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from libc.stdint cimport uint64_t + +cdef extern from "raft/random/rng_state.hpp" namespace "raft::random" nogil: + + ctypedef enum GeneratorType: + GenPhilox "raft::random::GeneratorType::GenPhilox" + GenPC "raft::random::GeneratorType::GenPC" + + cdef cppclass RngState: + RngState(uint64_t seed) except + + uint64_t seed + uint64_t base_subsequence + GeneratorType type diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py index d4f73ecf2b..b9b4ba9e30 100644 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ b/python/pylibraft/pylibraft/test/test_distance.py @@ -17,32 +17,9 @@ import pytest import numpy as np -import rmm - from pylibraft.distance import pairwise_distance - -class TestDeviceBuffer: - - def __init__(self, ndarray, order): - self.ndarray_ = ndarray - self.device_buffer_ = \ - rmm.DeviceBuffer.to_device(ndarray.ravel(order=order).tobytes()) - - @property - def __cuda_array_interface__(self): - device_cai = self.device_buffer_.__cuda_array_interface__ - host_cai = self.ndarray_.__array_interface__.copy() - host_cai["data"] = (device_cai["data"][0], device_cai["data"][1]) - - return host_cai - - def copy_to_host(self): - return np.frombuffer(self.device_buffer_.tobytes(), - dtype=self.ndarray_.dtype, - like=self.ndarray_)\ - .astype(self.ndarray_.dtype)\ - .reshape(self.ndarray_.shape) +from pylibraft.testing.utils import TestDeviceBuffer @pytest.mark.parametrize("n_rows", [100]) diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/test/test_random.py new file mode 100644 index 0000000000..8a04f707de --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_random.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import numpy as np + +from pylibraft.random import rmat + +from pylibraft.testing.utils import TestDeviceBuffer + + +def generate_theta(r_scale, c_scale): + max_scale = max(r_scale, c_scale) + theta = np.random.random_sample(max_scale * 4) + for i in range(max_scale): + a = theta[4 * i] + b = theta[4 * i + 1] + c = theta[4 * i + 2] + d = theta[4 * i + 3] + total = a + b + c + d + theta[4 * i] = a / total + theta[4 * i + 1] = b / total + theta[4 * i + 2] = c / total + theta[4 * i + 3] = d / total + theta_device = TestDeviceBuffer(theta, "C") + return theta, theta_device + + +@pytest.mark.parametrize("n_edges", [10000, 20000]) +@pytest.mark.parametrize("r_scale", [16, 18]) +@pytest.mark.parametrize("c_scale", [16, 18]) +@pytest.mark.parametrize("dtype", [np.int32, np.int64]) +def test_rmat(n_edges, r_scale, c_scale, dtype): + theta, theta_device = generate_theta(r_scale, c_scale) + out_buff = np.empty((n_edges, 2), dtype=dtype) + output_device = TestDeviceBuffer(out_buff, "C") + rmat(output_device, theta_device, r_scale, c_scale, 12345) + output = output_device.copy_to_host() + # a more rigorous tests have been done at the c++ level + assert np.all(output[:, 0] >= 0) + assert np.all(output[:, 0] < 2**r_scale) + assert np.all(output[:, 1] >= 0) + assert np.all(output[:, 1] < 2**c_scale) + rmat(output_device, theta_device, r_scale, c_scale, 12345) + output1 = output_device.copy_to_host() + assert np.all(np.equal(output, output1)) + + +def test_rmat_exception(): + n_edges = 20000 + r_scale = c_scale = 16 + dtype = np.int32 + with pytest.raises(Exception) as exception: + out_buff = np.empty((n_edges, 2), dtype=dtype) + output_device = TestDeviceBuffer(out_buff, "C") + rmat(output_device, None, r_scale, c_scale, 12345) + assert exception is not None + assert exception.message == "'theta' cannot be None!" + with pytest.raises(Exception) as exception: + theta, theta_device = generate_theta(r_scale, c_scale) + rmat(None, theta_device, r_scale, c_scale, 12345) + assert exception is not None + assert exception.message == "'out' cannot be None!" + + +def test_rmat_valueerror(): + n_edges = 20000 + r_scale = c_scale = 16 + with pytest.raises(ValueError) as exception: + out_buff = np.empty((n_edges, 2), dtype=np.int16) + output_device = TestDeviceBuffer(out_buff, "C") + theta, theta_device = generate_theta(r_scale, c_scale) + rmat(output_device, theta_device, r_scale, c_scale, 12345) + assert exception is not None + assert "not supported" in exception.message diff --git a/python/pylibraft/pylibraft/testing/__init__.py b/python/pylibraft/pylibraft/testing/__init__.py new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/testing/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/testing/utils.py b/python/pylibraft/pylibraft/testing/utils.py new file mode 100644 index 0000000000..53115e991c --- /dev/null +++ b/python/pylibraft/pylibraft/testing/utils.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np + +import rmm + + +class TestDeviceBuffer: + + def __init__(self, ndarray, order): + self.ndarray_ = ndarray + self.device_buffer_ = \ + rmm.DeviceBuffer.to_device(ndarray.ravel(order=order).tobytes()) + + @property + def __cuda_array_interface__(self): + device_cai = self.device_buffer_.__cuda_array_interface__ + host_cai = self.ndarray_.__array_interface__.copy() + host_cai["data"] = (device_cai["data"][0], device_cai["data"][1]) + + return host_cai + + def copy_to_host(self): + return np.frombuffer(self.device_buffer_.tobytes(), + dtype=self.ndarray_.dtype, + like=self.ndarray_)\ + .astype(self.ndarray_.dtype)\ + .reshape(self.ndarray_.shape) diff --git a/python/pylibraft/setup.py b/python/pylibraft/setup.py index 6b4e5ddf69..561c105d36 100644 --- a/python/pylibraft/setup.py +++ b/python/pylibraft/setup.py @@ -40,7 +40,9 @@ "pylibraft.distance", "pylibraft.distance.includes", "pylibraft.common", - "pylibraft.common.includes" + "pylibraft.common.includes", + "pylibraft.random", + "pylibraft.random.includes" ] ) }, From ab9a6951285f372a94ee09b08deb97f03de09e60 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 25 Aug 2022 01:14:20 +0200 Subject: [PATCH 09/58] ivf_flat::index: hide implementation details (#747) Hide the mutable `mdarray` members of `ivf_flat::index` behind immutable `mdspan` views. Authors: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/747 --- .../knn/detail/ann_kmeans_balanced.cuh | 13 +- .../raft/spatial/knn/detail/ann_utils.cuh | 18 +- .../spatial/knn/detail/ivf_flat_build.cuh | 146 ++++------- .../spatial/knn/detail/ivf_flat_search.cuh | 75 +++--- .../raft/spatial/knn/detail/processing.cuh | 2 +- cpp/include/raft/spatial/knn/ivf_flat.cuh | 53 +++- .../raft/spatial/knn/ivf_flat_types.hpp | 246 +++++++++++++----- cpp/test/spatial/ann_ivf_flat.cu | 12 +- 8 files changed, 347 insertions(+), 218 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh b/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh index 74e1ae75a8..fca5d05465 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh @@ -131,9 +131,16 @@ constexpr auto calc_minibatch_size(uint32_t n_clusters, size_t n_rows) -> uint32 /** * @brief Given the data and labels, calculate cluster centers and sizes in one sweep. * - * Let S_i = {x_k | x_k \in dataset & labels[k] == i} be the vectors in the dataset with label i. - * On exit centers_i = normalize(\sum_{x \in S_i} x), where `normalize` depends on the distance - * type. + * Let `S_i = {x_k | x_k \in dataset & labels[k] == i}` be the vectors in the dataset with label i. + * + * On exit, + * `centers_i = (\sum_{x \in S_i} x + w_i * center_i) / (|S_i| + w_i)`, + * where `w_i = reset_counters ? 0 : cluster_size[i]`. + * + * In other words, the updated cluster centers are a weighted average of the existing cluster + * center, and the coordinates of the points labeled with i. _This allows calling this function + * multiple times with different datasets with the same effect as if calling this function once + * on the combined dataset_. * * NB: `centers` and `cluster_sizes` must be accessible on GPU due to * divide_along_rows/normalize_rows. The rest can be both, under assumption that all pointers are diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index e789bafde2..d4bce1fdf4 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -368,8 +368,8 @@ __global__ void map_along_rows_kernel( } /** - * @brief Divide matrix values along rows by an integer value, skipping rows if the corresponding - * divisor is zero. + * @brief Map a binary function over a matrix and a vector element-wise, broadcasting the vector + * values along rows: `m[i, j] = op(m[i,j], v[i])` * * NB: device-only function * @@ -377,16 +377,16 @@ __global__ void map_along_rows_kernel( * * @param n_rows * @param n_cols - * @param[inout] a device pointer to a row-major matrix [n_rows, n_cols] - * @param[in] d device pointer to a vector [n_rows] - * @param map the binary operation to apply on every element of matrix rows and of the vector + * @param[inout] m device pointer to a row-major matrix [n_rows, n_cols] + * @param[in] v device pointer to a vector [n_rows] + * @param op the binary operation to apply on every element of matrix rows and of the vector */ template inline void map_along_rows(uint32_t n_rows, uint32_t n_cols, - float* a, - const uint32_t* d, - Lambda map, + float* m, + const uint32_t* v, + Lambda op, rmm::cuda_stream_view stream) { dim3 threads(128, 1, 1); @@ -394,7 +394,7 @@ inline void map_along_rows(uint32_t n_rows, ceildiv(static_cast(n_rows) * static_cast(n_cols), threads.x), 1, 1); - map_along_rows_kernel<<>>(n_rows, n_cols, a, d, map); + map_along_rows_kernel<<>>(n_rows, n_cols, m, v, op); } template diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 96af5c9522..2798e23768 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -108,39 +108,38 @@ inline auto extend(const handle_t& handle, const index& orig_index, const T* new_vectors, const IdxT* new_indices, - IdxT n_rows, - rmm::cuda_stream_view stream) -> index + IdxT n_rows) -> index { - auto n_lists = orig_index.n_lists; - auto dim = orig_index.dim; + auto stream = handle.get_stream(); + auto n_lists = orig_index.n_lists(); + auto dim = orig_index.dim(); common::nvtx::range fun_scope( "ivf_flat::extend(%zu, %u)", size_t(n_rows), dim); - RAFT_EXPECTS(new_indices != nullptr || orig_index.size == 0, + RAFT_EXPECTS(new_indices != nullptr || orig_index.size() == 0, "You must pass data indices when the index is non-empty."); rmm::device_uvector new_labels(n_rows, stream); kmeans::predict(handle, - orig_index.centers.data(), + orig_index.centers().data_handle(), n_lists, dim, new_vectors, n_rows, new_labels.data(), - orig_index.metric, + orig_index.metric(), stream); - auto&& list_sizes = rmm::device_uvector(n_lists, stream); - auto&& list_offsets = rmm::device_uvector(n_lists + 1, stream); - auto list_sizes_ptr = list_sizes.data(); - auto list_offsets_ptr = list_offsets.data(); + index ext_index(handle, orig_index.metric(), n_lists, dim); - auto&& centers = rmm::device_uvector(size_t(n_lists) * size_t(dim), stream); - auto centers_ptr = centers.data(); + auto list_sizes_ptr = ext_index.list_sizes().data_handle(); + auto list_offsets_ptr = ext_index.list_offsets().data_handle(); + auto centers_ptr = ext_index.centers().data_handle(); // Calculate the centers and sizes on the new data, starting from the original values - raft::copy(centers_ptr, orig_index.centers.data(), centers.size(), stream); - raft::copy(list_sizes_ptr, orig_index.list_sizes.data(), list_sizes.size(), stream); + raft::copy(centers_ptr, orig_index.centers().data_handle(), ext_index.centers().size(), stream); + raft::copy( + list_sizes_ptr, orig_index.list_sizes().data_handle(), ext_index.list_sizes().size(), stream); kmeans::calc_centers_and_sizes(centers_ptr, list_sizes_ptr, @@ -160,35 +159,36 @@ inline auto extend(const handle_t& handle, list_sizes_ptr, list_sizes_ptr + n_lists, list_offsets_ptr + 1, - [] __device__(IdxT s, uint32_t l) { return s + Pow2::roundUp(l); }); + [] __device__(IdxT s, uint32_t l) { return s + Pow2::roundUp(l); }); update_host(&index_size, list_offsets_ptr + n_lists, 1, stream); handle.sync_stream(stream); - auto&& data = rmm::device_uvector(index_size * IdxT(dim), stream); - auto&& indices = rmm::device_uvector(index_size, stream); + ext_index.allocate( + handle, index_size, ext_index.metric() == raft::distance::DistanceType::L2Expanded); // Populate index with the old data - if (orig_index.size > 0) { - utils::block_copy(orig_index.list_offsets.data(), + if (orig_index.size() > 0) { + utils::block_copy(orig_index.list_offsets().data_handle(), list_offsets_ptr, IdxT(n_lists), - orig_index.data.data(), - data.data(), + orig_index.data().data_handle(), + ext_index.data().data_handle(), IdxT(dim), stream); - utils::block_copy(orig_index.list_offsets.data(), + utils::block_copy(orig_index.list_offsets().data_handle(), list_offsets_ptr, IdxT(n_lists), - orig_index.indices.data(), - indices.data(), + orig_index.indices().data_handle(), + ext_index.indices().data_handle(), IdxT(1), stream); } // Copy the old sizes, so we can start from the current state of the index; // we'll rebuild the `list_sizes_ptr` in the following kernel, using it as an atomic counter. - raft::copy(list_sizes_ptr, orig_index.list_sizes.data(), list_sizes.size(), stream); + raft::copy( + list_sizes_ptr, orig_index.list_sizes().data_handle(), ext_index.list_sizes().size(), stream); const dim3 block_dim(256); const dim3 grid_dim(raft::ceildiv(n_rows, block_dim.x)); @@ -196,70 +196,44 @@ inline auto extend(const handle_t& handle, list_offsets_ptr, new_vectors, new_indices, - data.data(), - indices.data(), + ext_index.data().data_handle(), + ext_index.indices().data_handle(), list_sizes_ptr, n_rows, dim, - orig_index.veclen); + ext_index.veclen()); RAFT_CUDA_TRY(cudaPeekAtLastError()); // Precompute the centers vector norms for L2Expanded distance - auto compute_norms = [&]() { - auto&& r = rmm::device_uvector(n_lists, stream); - utils::dots_along_rows(n_lists, dim, centers.data(), r.data(), stream); - RAFT_LOG_TRACE_VEC(r.data(), 20); - return std::move(r); - }; - auto&& center_norms = orig_index.metric == raft::distance::DistanceType::L2Expanded - ? std::optional(compute_norms()) - : std::nullopt; + if (ext_index.center_norms().has_value()) { + utils::dots_along_rows(n_lists, + dim, + ext_index.centers().data_handle(), + ext_index.center_norms()->data_handle(), + stream); + RAFT_LOG_TRACE_VEC(ext_index.center_norms()->data_handle(), std::min(dim, 20)); + } // assemble the index - index new_index{{}, - orig_index.veclen, - orig_index.metric, - index_size, - orig_index.dim, - orig_index.n_lists, - std::move(data), - std::move(indices), - std::move(list_sizes), - std::move(list_offsets), - std::move(centers), - std::move(center_norms)}; - - // check index invariants - new_index.check_consistency(); - - return new_index; + return ext_index; } /** See raft::spatial::knn::ivf_flat::build docs */ template -inline auto build(const handle_t& handle, - const index_params& params, - const T* dataset, - IdxT n_rows, - uint32_t dim, - rmm::cuda_stream_view stream) -> index +inline auto build( + const handle_t& handle, const index_params& params, const T* dataset, IdxT n_rows, uint32_t dim) + -> index { + auto stream = handle.get_stream(); common::nvtx::range fun_scope( "ivf_flat::build(%zu, %u)", size_t(n_rows), dim); static_assert(std::is_same_v || std::is_same_v || std::is_same_v, "unsupported data type"); RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset"); - // TODO: consider padding the dimensions and fixing veclen to its maximum possible value as a - // template parameter (https://github.com/rapidsai/raft/issues/711) - uint32_t veclen = 16 / sizeof(T); - while (dim % veclen != 0) { - veclen = veclen >> 1; - } - auto n_lists = static_cast(params.n_lists); - - // kmeans cluster ids for the dataset - auto&& centers = rmm::device_uvector(size_t(n_lists) * size_t(dim), stream); + index index(handle, params, dim); + utils::memzero(index.list_sizes().data_handle(), index.list_sizes().size(), stream); + utils::memzero(index.list_offsets().data_handle(), index.list_offsets().size(), stream); // Predict labels of the whole dataset kmeans::build_optimized_kmeans(handle, @@ -267,39 +241,15 @@ inline auto build(const handle_t& handle, dim, dataset, n_rows, - centers.data(), - n_lists, + index.centers().data_handle(), + params.n_lists, params.kmeans_trainset_fraction, params.metric, stream); - auto&& data = rmm::device_uvector(0, stream); - auto&& indices = rmm::device_uvector(0, stream); - auto&& list_sizes = rmm::device_uvector(n_lists, stream); - auto&& list_offsets = rmm::device_uvector(n_lists + 1, stream); - utils::memzero(list_sizes.data(), list_sizes.size(), stream); - utils::memzero(list_offsets.data(), list_offsets.size(), stream); - - // assemble the index - index index{{}, - veclen, - params.metric, - IdxT(0), - dim, - n_lists, - std::move(data), - std::move(indices), - std::move(list_sizes), - std::move(list_offsets), - std::move(centers), - std::nullopt}; - - // check index invariants - index.check_consistency(); - // add the data if necessary if (params.add_data_on_build) { - return extend(handle, index, dataset, nullptr, n_rows, stream); + return detail::extend(handle, index, dataset, nullptr, n_rows); } else { return index; } diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh index a52fbc69de..fab845396c 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh @@ -35,8 +35,6 @@ #include #include -#include - namespace raft::spatial::knn::ivf_flat::detail { using namespace raft::spatial::knn::detail; // NOLINT @@ -819,13 +817,13 @@ void launch_kernel(Lambda lambda, uint32_t& grid_dim_x, rmm::cuda_stream_view stream) { - RAFT_EXPECTS(Veclen == index.veclen, + RAFT_EXPECTS(Veclen == index.veclen(), "Configured Veclen does not match the index interleaving pattern."); constexpr auto kKernel = interleaved_scan_kernel; const int max_query_smem = 16384; int query_smem_elems = - std::min(max_query_smem / sizeof(T), Pow2::roundUp(index.dim)); + std::min(max_query_smem / sizeof(T), Pow2::roundUp(index.dim())); int smem_size = query_smem_elems * sizeof(T); constexpr int kSubwarpSize = std::min(Capacity, WarpSize); smem_size += raft::spatial::knn::detail::topk::calc_smem_size_for_block_wide( @@ -855,16 +853,16 @@ void launch_kernel(Lambda lambda, query_smem_elems, queries, coarse_index, - index.indices.data(), - index.data.data(), - index.list_sizes.data(), - index.list_offsets.data(), + index.indices().data_handle(), + index.data().data_handle(), + index.list_sizes().data_handle(), + index.list_offsets().data_handle(), n_probes, k, - index.dim, + index.dim(), neighbors, distances); - queries += grid_dim_y * index.dim; + queries += grid_dim_y * index.dim(); neighbors += grid_dim_y * grid_dim_x * k; distances += grid_dim_y * grid_dim_x * k; } @@ -1041,7 +1039,7 @@ void ivfflat_interleaved_scan(const ivf_flat::index& index, { const int capacity = raft::spatial::knn::detail::topk::calc_capacity(k); select_interleaved_scan_kernel::run(capacity, - index.veclen, + index.veclen(), select_min, metric, index, @@ -1066,13 +1064,13 @@ void search_impl(const handle_t& handle, bool select_min, IdxT* neighbors, AccT* distances, - rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* search_mr) { + auto stream = handle.get_stream(); // The norm of query rmm::device_uvector query_norm_dev(n_queries, stream, search_mr); // The distance value of cluster(list) and queries - rmm::device_uvector distance_buffer_dev(n_queries * index.n_lists, stream, search_mr); + rmm::device_uvector distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr); // The topk distance value of cluster(list) and queries rmm::device_uvector coarse_distances_dev(n_queries * n_probes, stream, search_mr); // The topk index of cluster(list) and queries @@ -1084,7 +1082,7 @@ void search_impl(const handle_t& handle, size_t float_query_size; if constexpr (std::is_integral_v) { - float_query_size = n_queries * index.dim; + float_query_size = n_queries * index.dim(); } else { float_query_size = 0; } @@ -1095,25 +1093,25 @@ void search_impl(const handle_t& handle, converted_queries_ptr = const_cast(queries); } else { linalg::unaryOp( - converted_queries_ptr, queries, n_queries * index.dim, utils::mapping{}, stream); + converted_queries_ptr, queries, n_queries * index.dim(), utils::mapping{}, stream); } float alpha = 1.0f; float beta = 0.0f; - if (index.metric == raft::distance::DistanceType::L2Expanded) { + if (index.metric() == raft::distance::DistanceType::L2Expanded) { alpha = -2.0f; beta = 1.0f; utils::dots_along_rows( - n_queries, index.dim, converted_queries_ptr, query_norm_dev.data(), stream); + n_queries, index.dim(), converted_queries_ptr, query_norm_dev.data(), stream); utils::outer_add(query_norm_dev.data(), n_queries, - index.center_norms->data(), - index.n_lists, + index.center_norms()->data_handle(), + index.n_lists(), distance_buffer_dev.data(), stream); - RAFT_LOG_TRACE_VEC(index.center_norms->data(), 20); - RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), 20); + RAFT_LOG_TRACE_VEC(index.center_norms()->data_handle(), std::min(20, index.dim())); + RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); } else { alpha = 1.0f; beta = 0.0f; @@ -1122,25 +1120,25 @@ void search_impl(const handle_t& handle, linalg::gemm(handle, true, false, - index.n_lists, + index.n_lists(), n_queries, - index.dim, + index.dim(), &alpha, - index.centers.data(), - index.dim, + index.centers().data_handle(), + index.dim(), converted_queries_ptr, - index.dim, + index.dim(), &beta, distance_buffer_dev.data(), - index.n_lists, + index.n_lists(), stream); - RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), 20); + RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); if (n_probes <= raft::spatial::knn::detail::topk::kMaxCapacity) { topk::warp_sort_topk(distance_buffer_dev.data(), nullptr, n_queries, - index.n_lists, + index.n_lists(), n_probes, coarse_distances_dev.data(), coarse_indices_dev.data(), @@ -1151,7 +1149,7 @@ void search_impl(const handle_t& handle, topk::radix_topk(distance_buffer_dev.data(), nullptr, n_queries, - index.n_lists, + index.n_lists(), n_probes, coarse_distances_dev.data(), coarse_indices_dev.data(), @@ -1159,8 +1157,8 @@ void search_impl(const handle_t& handle, stream, search_mr); } - RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), 1 * n_probes); - RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), 1 * n_probes); + RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), n_probes); + RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), n_probes); auto distances_dev_ptr = refined_distances_dev.data(); auto indices_dev_ptr = refined_indices_dev.data(); @@ -1172,7 +1170,7 @@ void search_impl(const handle_t& handle, nullptr, nullptr, n_queries, - index.metric, + index.metric(), n_probes, k, select_min, @@ -1193,7 +1191,7 @@ void search_impl(const handle_t& handle, queries, coarse_indices_dev.data(), n_queries, - index.metric, + index.metric(), n_probes, k, select_min, @@ -1245,18 +1243,17 @@ inline void search(const handle_t& handle, uint32_t k, IdxT* neighbors, float* distances, - rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = nullptr) { common::nvtx::range fun_scope( - "ivf_flat::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim); + "ivf_flat::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim()); RAFT_EXPECTS(params.n_probes > 0, "n_probes (number of clusters to probe in the search) must be positive."); - auto n_probes = std::min(params.n_probes, index.n_lists); + auto n_probes = std::min(params.n_probes, index.n_lists()); bool select_min; - switch (index.metric) { + switch (index.metric()) { case raft::distance::DistanceType::InnerProduct: case raft::distance::DistanceType::CosineExpanded: case raft::distance::DistanceType::CorrelationExpanded: @@ -1275,7 +1272,7 @@ inline void search(const handle_t& handle, } return search_impl( - handle, index, queries, n_queries, k, n_probes, select_min, neighbors, distances, stream, mr); + handle, index, queries, n_queries, k, n_probes, select_min, neighbors, distances, mr); } } // namespace raft::spatial::knn::ivf_flat::detail diff --git a/cpp/include/raft/spatial/knn/detail/processing.cuh b/cpp/include/raft/spatial/knn/detail/processing.cuh index 79c437b020..a88b55e803 100644 --- a/cpp/include/raft/spatial/knn/detail/processing.cuh +++ b/cpp/include/raft/spatial/knn/detail/processing.cuh @@ -93,7 +93,7 @@ class CosineMetricProcessor : public MetricProcessor { data, data, k_ * n_rows_, [] __device__(math_t in) { return 1 - in; }, stream_); } - virtual void set_num_queries(int k) { k_ = k; } + void set_num_queries(int k) override { k_ = k; } ~CosineMetricProcessor() = default; }; diff --git a/cpp/include/raft/spatial/knn/ivf_flat.cuh b/cpp/include/raft/spatial/knn/ivf_flat.cuh index 98cccd64df..09bd8edd85 100644 --- a/cpp/include/raft/spatial/knn/ivf_flat.cuh +++ b/cpp/include/raft/spatial/knn/ivf_flat.cuh @@ -64,8 +64,7 @@ inline auto build( const handle_t& handle, const index_params& params, const T* dataset, IdxT n_rows, uint32_t dim) -> index { - return raft::spatial::knn::ivf_flat::detail::build( - handle, params, dataset, n_rows, dim, handle.get_stream()); + return raft::spatial::knn::ivf_flat::detail::build(handle, params, dataset, n_rows, dim); } /** @@ -108,7 +107,31 @@ inline auto extend(const handle_t& handle, IdxT n_rows) -> index { return raft::spatial::knn::ivf_flat::detail::extend( - handle, orig_index, new_vectors, new_indices, n_rows, handle.get_stream()); + handle, orig_index, new_vectors, new_indices, n_rows); +} + +/** + * @brief Extend the index with the new data. + * * + * @tparam T data element type + * @tparam IdxT type of the indices in the source dataset + * + * @param handle + * @param[inout] index + * @param[in] new_vectors a device pointer to a row-major matrix [n_rows, index.dim()] + * @param[in] new_indices a device pointer to a vector of indices [n_rows]. + * If the original index is empty (`orig_index.size() == 0`), you can pass `nullptr` + * here to imply a continuous range `[0...n_rows)`. + * @param n_rows the number of samples + */ +template +inline void extend(const handle_t& handle, + index* index, + const T* new_vectors, + const IdxT* new_indices, + IdxT n_rows) +{ + *index = extend(handle, *index, new_vectors, new_indices, n_rows); } /** @@ -116,6 +139,28 @@ inline auto extend(const handle_t& handle, * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * + * Note, this function requires a temporary buffer to store intermediate results between cuda kernel + * calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can + * pass a pool memory resource or a large enough pre-allocated memory resource to reduce or + * eliminate entirely allocations happening within `search`: + * @code{.cpp} + * ... + * // Create a pooling memory resource with a pre-defined initial size. + * rmm::mr::pool_memory_resource mr( + * rmm::mr::get_current_device_resource(), 1024 * 1024); + * // use default search parameters + * ivf_flat::search_params search_params; + * // Use the same allocator across multiple searches to reduce the number of + * // cuda memory allocations + * ivf_flat::search(handle, search_params, index, queries1, N1, K, out_inds1, out_dists1, &mr); + * ivf_flat::search(handle, search_params, index, queries2, N2, K, out_inds2, out_dists2, &mr); + * ivf_flat::search(handle, search_params, index, queries3, N3, K, out_inds3, out_dists3, &mr); + * ... + * @endcode + * The exact size of the temporary buffer depends on multiple factors and is an implementation + * detail. However, you can safely specify a small initial size for the memory pool, so that only a + * few allocations happen to grow it during the first invocations of the `search`. + * * @tparam T data element type * @tparam IdxT type of the indices * @@ -143,7 +188,7 @@ inline void search(const handle_t& handle, rmm::mr::device_memory_resource* mr = nullptr) { return raft::spatial::knn::ivf_flat::detail::search( - handle, params, index, queries, n_queries, k, neighbors, distances, handle.get_stream(), mr); + handle, params, index, queries, n_queries, k, neighbors, distances, mr); } } // namespace raft::spatial::knn::ivf_flat diff --git a/cpp/include/raft/spatial/knn/ivf_flat_types.hpp b/cpp/include/raft/spatial/knn/ivf_flat_types.hpp index 6c46a288c1..02c4e30c1f 100644 --- a/cpp/include/raft/spatial/knn/ivf_flat_types.hpp +++ b/cpp/include/raft/spatial/knn/ivf_flat_types.hpp @@ -19,11 +19,10 @@ #include "common.hpp" #include +#include #include #include -#include - #include namespace raft::spatial::knn::ivf_flat { @@ -31,22 +30,26 @@ namespace raft::spatial::knn::ivf_flat { /** Size of the interleaved group (see `index::data` description). */ constexpr static uint32_t kIndexGroupSize = 32; +struct index_params : knn::index_params { + /** The number of inverted lists (clusters) */ + uint32_t n_lists = 1024; + /** The number of iterations searching for kmeans centers (index building). */ + uint32_t kmeans_n_iters = 20; + /** The fraction of data to use during iterative kmeans building. */ + double kmeans_trainset_fraction = 0.5; +}; + +struct search_params : knn::search_params { + /** The number of clusters to search. */ + uint32_t n_probes = 20; +}; + +static_assert(std::is_aggregate_v); +static_assert(std::is_aggregate_v); + /** * @brief IVF-flat index. * - * This structure is supposed to be immutable: it's only constructed using `ivf_flat::build`, - * and should never be modified. - * At the same time, we expose all its members and allow the aggregate construction, so that - * third-party users can implement custom serialization/deserialization routines or modify - * the index building process. - * - * It would seem logical to make all the type's members constant. However, we can't do that - * because it would imply copying data when the index is moved. And we also cannot return - * `const index` in our factory functions, such as `ivf_flat::build`, because then the result - * couldn't be moved. - * Therefore, we return `index` mutable as-is, with a warning to the users that there are no - * protection mechanisms against manipulating the data. - * * @tparam T data element type * @tparam IdxT type of the indices in the source dataset * @@ -55,36 +58,34 @@ template struct index : knn::index { static_assert(!raft::is_narrowing_v, "IdxT must be able to represent all values of uint32_t"); + + public: /** * Vectorized load/store size in elements, determines the size of interleaved data chunks. * * TODO: in theory, we can lift this to the template parameter and keep it at hardware maximum * possible value by padding the `dim` of the data https://github.com/rapidsai/raft/issues/711 */ - const uint32_t veclen; + [[nodiscard]] constexpr inline auto veclen() const noexcept -> uint32_t { return veclen_; } /** Distance metric used for clustering. */ - const raft::distance::DistanceType metric; - /** Total length of the index. */ - const IdxT size; - /** Dimensionality of the data. */ - const uint32_t dim; - /** Number of clusters/inverted lists. */ - const uint32_t n_lists; - + [[nodiscard]] constexpr inline auto metric() const noexcept -> raft::distance::DistanceType + { + return metric_; + } /** * Inverted list data [size, dim]. * * The data consists of the dataset rows, grouped by their labels (into clusters/lists). - * Within each list (cluster), the data is grouped into blocks of `kGroupSize` interleaved + * Within each list (cluster), the data is grouped into blocks of `kIndexGroupSize` interleaved * vectors. Note, the total index length is slightly larger than the source dataset length, - * because each cluster is padded by `kGroupSize` elements. + * because each cluster is padded by `kIndexGroupSize` elements. * * Interleaving pattern: - * within groups of `kGroupSize` rows, the data is interleaved with the block size equal to + * within groups of `kIndexGroupSize` rows, the data is interleaved with the block size equal to * `veclen * sizeof(T)`. That is, a chunk of `veclen` consecutive components of one row is * followed by a chunk of the same size of the next row, and so on. * - * __Example__: veclen = 2, dim = 6, kGroupSize = 32, list_size = 31 + * __Example__: veclen = 2, dim = 6, kIndexGroupSize = 32, list_size = 31 * * x[ 0, 0], x[ 0, 1], x[ 1, 0], x[ 1, 1], ... x[14, 0], x[14, 1], x[15, 0], x[15, 1], * x[16, 0], x[16, 1], x[17, 0], x[17, 1], ... x[30, 0], x[30, 1], - , - , @@ -94,20 +95,102 @@ struct index : knn::index { * x[16, 4], x[16, 5], x[17, 4], x[17, 5], ... x[30, 4], x[30, 5], - , - , * */ - rmm::device_uvector data; + inline auto data() noexcept -> device_mdspan, row_major> + { + return data_.view(); + } + [[nodiscard]] inline auto data() const noexcept + -> device_mdspan, row_major> + { + return data_.view(); + } + /** Inverted list indices: ids of items in the source data [size] */ - rmm::device_uvector indices; + inline auto indices() noexcept -> device_mdspan, row_major> + { + return indices_.view(); + } + [[nodiscard]] inline auto indices() const noexcept + -> device_mdspan, row_major> + { + return indices_.view(); + } + /** Sizes of the lists (clusters) [n_lists] */ - rmm::device_uvector list_sizes; + inline auto list_sizes() noexcept -> device_mdspan, row_major> + { + return list_sizes_.view(); + } + [[nodiscard]] inline auto list_sizes() const noexcept + -> device_mdspan, row_major> + { + return list_sizes_.view(); + } + /** * Offsets into the lists [n_lists + 1]. * The last value contains the total length of the index. */ - rmm::device_uvector list_offsets; + inline auto list_offsets() noexcept -> device_mdspan, row_major> + { + return list_offsets_.view(); + } + [[nodiscard]] inline auto list_offsets() const noexcept + -> device_mdspan, row_major> + { + return list_offsets_.view(); + } + /** k-means cluster centers corresponding to the lists [n_lists, dim] */ - rmm::device_uvector centers; - /** (Optional) Precomputed norms of the `centers` w.r.t. the chosen distance metric [n_lists] */ - std::optional> center_norms; + inline auto centers() noexcept -> device_mdspan, row_major> + { + return centers_.view(); + } + [[nodiscard]] inline auto centers() const noexcept + -> device_mdspan, row_major> + { + return centers_.view(); + } + + /** + * (Optional) Precomputed norms of the `centers` w.r.t. the chosen distance metric [n_lists]. + * + * NB: this may be empty if the index is empty or if the metric does not require the center norms + * calculation. + */ + inline auto center_norms() noexcept + -> std::optional, row_major>> + { + if (center_norms_.has_value()) { + return std::make_optional, row_major>>( + center_norms_->view()); + } else { + return std::nullopt; + } + } + [[nodiscard]] inline auto center_norms() const noexcept + -> std::optional, row_major>> + { + if (center_norms_.has_value()) { + return std::make_optional, row_major>>( + center_norms_->view()); + } else { + return std::nullopt; + } + } + + /** Total length of the index. */ + [[nodiscard]] constexpr inline auto size() const noexcept -> IdxT { return indices_.extent(0); } + /** Dimensionality of the data. */ + [[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t + { + return centers_.extent(1); + } + /** Number of clusters/inverted lists. */ + [[nodiscard]] constexpr inline auto n_lists() const noexcept -> uint32_t + { + return centers_.extent(0); + } // Don't allow copying the index for performance reasons (try avoiding copying data) index(const index&) = delete; @@ -116,34 +199,81 @@ struct index : knn::index { auto operator=(index&&) -> index& = default; ~index() = default; + /** Construct an empty index. It needs to be trained and then populated. */ + index(const handle_t& handle, raft::distance::DistanceType metric, uint32_t n_lists, uint32_t dim) + : knn::index(), + veclen_(calculate_veclen(dim)), + metric_(metric), + data_(make_device_mdarray(handle, make_extents(0, dim))), + indices_(make_device_mdarray(handle, make_extents(0))), + list_sizes_(make_device_mdarray(handle, make_extents(n_lists))), + list_offsets_(make_device_mdarray(handle, make_extents(n_lists + 1))), + centers_(make_device_mdarray(handle, make_extents(n_lists, dim))), + center_norms_(std::nullopt) + { + check_consistency(); + } + + /** Construct an empty index. It needs to be trained and then populated. */ + index(const handle_t& handle, const index_params& params, uint32_t dim) + : index(handle, params.metric, params.n_lists, dim) + { + } + + /** + * Replace the content of the index with new uninitialized mdarrays to hold the indicated amount + * of data. + */ + void allocate(const handle_t& handle, IdxT index_size, bool allocate_center_norms) + { + data_ = make_device_mdarray(handle, make_extents(index_size, dim())); + indices_ = make_device_mdarray(handle, make_extents(index_size)); + center_norms_ = + allocate_center_norms + ? std::optional(make_device_mdarray(handle, make_extents(n_lists()))) + : std::nullopt; + check_consistency(); + } + + private: + /** + * TODO: in theory, we can lift this to the template parameter and keep it at hardware maximum + * possible value by padding the `dim` of the data https://github.com/rapidsai/raft/issues/711 + */ + uint32_t veclen_; + raft::distance::DistanceType metric_; + device_mdarray, row_major> data_; + device_mdarray, row_major> indices_; + device_mdarray, row_major> list_sizes_; + device_mdarray, row_major> list_offsets_; + device_mdarray, row_major> centers_; + std::optional, row_major>> center_norms_; + /** Throw an error if the index content is inconsistent. */ - inline void check_consistency() const + void check_consistency() { - RAFT_EXPECTS(dim % veclen == 0, "dimensionality is not a multiple of the veclen"); - RAFT_EXPECTS(list_offsets.size() == list_sizes.size() + 1, - "inconsistent number of lists (clusters)"); - RAFT_EXPECTS(reinterpret_cast(data.data()) % (veclen * sizeof(T)) == 0, + RAFT_EXPECTS(dim() % veclen_ == 0, "dimensionality is not a multiple of the veclen"); + RAFT_EXPECTS(data_.extent(0) == indices_.extent(0), "inconsistent index size"); + RAFT_EXPECTS(data_.extent(1) == IdxT(centers_.extent(1)), "inconsistent data dimensionality"); + RAFT_EXPECTS( // + (centers_.extent(0) == list_sizes_.extent(0)) && // + (centers_.extent(0) + 1 == list_offsets_.extent(0)) && // + (!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)), + "inconsistent number of lists (clusters)"); + RAFT_EXPECTS(reinterpret_cast(data_.data_handle()) % (veclen_ * sizeof(T)) == 0, "The data storage pointer is not aligned to the vector length"); } -}; -struct index_params : knn::index_params { - /** The number of inverted lists (clusters) */ - uint32_t n_lists = 1024; - /** The number of iterations searching for kmeans centers (index building). */ - uint32_t kmeans_n_iters = 20; - /** The fraction of data to use during iterative kmeans building. */ - double kmeans_trainset_fraction = 0.5; -}; - -struct search_params : knn::search_params { - /** The number of clusters to search. */ - uint32_t n_probes = 20; + static auto calculate_veclen(uint32_t dim) -> uint32_t + { + // TODO: consider padding the dimensions and fixing veclen to its maximum possible value as a + // template parameter (https://github.com/rapidsai/raft/issues/711) + uint32_t veclen = 16 / sizeof(T); + while (dim % veclen != 0) { + veclen = veclen >> 1; + } + return veclen; + } }; -static_assert(std::is_standard_layout_v>); -static_assert(std::is_aggregate_v>); -static_assert(std::is_aggregate_v); -static_assert(std::is_aggregate_v); - } // namespace raft::spatial::knn::ivf_flat diff --git a/cpp/test/spatial/ann_ivf_flat.cu b/cpp/test/spatial/ann_ivf_flat.cu index a2398e96fc..73aeffada6 100644 --- a/cpp/test/spatial/ann_ivf_flat.cu +++ b/cpp/test/spatial/ann_ivf_flat.cu @@ -217,15 +217,15 @@ class AnnIVFFlatTest : public ::testing::TestWithParam { auto index_2 = ivf_flat::extend(handle_, index, database.data(), nullptr, half_of_data); - auto index_3 = ivf_flat::extend(handle_, - index_2, - database.data() + half_of_data * ps.dim, - vector_indices.data() + half_of_data, - int64_t(ps.num_db_vecs) - half_of_data); + ivf_flat::extend(handle_, + &index_2, + database.data() + half_of_data * ps.dim, + vector_indices.data() + half_of_data, + int64_t(ps.num_db_vecs) - half_of_data); ivf_flat::search(handle_, search_params, - index_3, + index_2, search_queries.data(), ps.num_queries, ps.k, From 57df37de52d9cc41d8d08aba0d4308b7675d159c Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 25 Aug 2022 18:16:08 +0200 Subject: [PATCH 10/58] Add alignment to the TxN_t vectorized type (#792) The recent removal of the union-based type punning in https://github.com/rapidsai/raft/pull/781 caused misaligned access in some cases. This PR returns the alignment to the data type. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - William Hicks (https://github.com/wphicks) URL: https://github.com/rapidsai/raft/pull/792 --- cpp/include/raft/vectorized.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/vectorized.cuh b/cpp/include/raft/vectorized.cuh index b2e4940dc8..6f22d740ca 100644 --- a/cpp/include/raft/vectorized.cuh +++ b/cpp/include/raft/vectorized.cuh @@ -269,7 +269,7 @@ struct TxN_t { /** defines the number of 'math_t' types stored by this struct */ static const int Ratio = veclen_; - struct { + struct alignas(io_t) { /** the vectorized data that is used for subsequent operations */ math_t data[Ratio]; } val; From c2e7e909cfc65491ae9a2c2dad740036111ea7a7 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 1 Sep 2022 15:21:45 -0400 Subject: [PATCH 11/58] Device, Host, Managed Accessor Types for `mdspan` (#776) Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/776 --- cpp/include/raft/core/mdarray.hpp | 130 +++++++++++++++++++--------- cpp/include/raft/detail/mdarray.hpp | 18 ++-- cpp/test/mdarray.cu | 12 +++ cpp/test/mdspan_utils.cu | 16 +--- 4 files changed, 117 insertions(+), 59 deletions(-) diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp index f1e735c4ab..d251c2b419 100644 --- a/cpp/include/raft/core/mdarray.hpp +++ b/cpp/include/raft/core/mdarray.hpp @@ -122,23 +122,15 @@ template inline constexpr bool is_mdspan_v = is_mdspan_t::value; } // namespace detail -template -struct is_mdspan : std::true_type { -}; -template -struct is_mdspan : detail::is_mdspan_t { -}; -template -struct is_mdspan - : std::conditional_t, is_mdspan, std::false_type> { -}; - /** * @\brief Boolean to determine if variadic template types Tn are either * raft::host_mdspan/raft::device_mdspan or their derived types */ template -inline constexpr bool is_mdspan_v = is_mdspan::value; +inline constexpr bool is_mdspan_v = std::conjunction_v...>; + +template +using enable_if_mdspan = std::enable_if_t>; /** * @brief stdex::mdspan with device tag to avoid accessing incorrect memory location. @@ -160,69 +152,83 @@ template >; +template > +using managed_mdspan = + mdspan>; + namespace detail { template struct is_device_mdspan : std::false_type { }; template -struct is_device_mdspan : std::bool_constant { +struct is_device_mdspan : std::bool_constant { }; /** * @\brief Boolean to determine if template type T is either raft::device_mdspan or a derived type */ template -inline constexpr bool is_device_mdspan_v = is_device_mdspan>::value; +using is_device_mdspan_t = is_device_mdspan>; template struct is_host_mdspan : std::false_type { }; template -struct is_host_mdspan : T::accessor_type::is_host_type { +struct is_host_mdspan : std::bool_constant { }; /** * @\brief Boolean to determine if template type T is either raft::host_mdspan or a derived type */ template -inline constexpr bool is_host_mdspan_v = is_host_mdspan>::value; -} // namespace detail +using is_host_mdspan_t = is_host_mdspan>; -template -struct is_device_mdspan : std::true_type { -}; -template -struct is_device_mdspan : detail::is_device_mdspan> { +template +struct is_managed_mdspan : std::false_type { }; -template -struct is_device_mdspan - : std::conditional_t, is_device_mdspan, std::false_type> { +template +struct is_managed_mdspan : std::bool_constant { }; +/** + * @\brief Boolean to determine if template type T is either raft::managed_mdspan or a derived type + */ +template +using is_managed_mdspan_t = is_managed_mdspan>; +} // namespace detail + /** * @\brief Boolean to determine if variadic template types Tn are either raft::device_mdspan or a * derived type */ template -inline constexpr bool is_device_mdspan_v = is_device_mdspan::value; +inline constexpr bool is_device_mdspan_v = std::conjunction_v...>; -template -struct is_host_mdspan : std::true_type { -}; -template -struct is_host_mdspan : detail::is_host_mdspan> { -}; -template -struct is_host_mdspan - : std::conditional_t, is_host_mdspan, std::false_type> { -}; +template +using enable_if_device_mdspan = std::enable_if_t>; /** * @\brief Boolean to determine if variadic template types Tn are either raft::host_mdspan or a * derived type */ template -inline constexpr bool is_host_mdspan_v = is_host_mdspan::value; +inline constexpr bool is_host_mdspan_v = std::conjunction_v...>; + +template +using enable_if_host_mdspan = std::enable_if_t>; + +/** + * @\brief Boolean to determine if variadic template types Tn are either raft::managed_mdspan or a + * derived type + */ +template +inline constexpr bool is_managed_mdspan_v = std::conjunction_v...>; + +template +using enable_if_managed_mdspan = std::enable_if_t>; /** * @brief Interface to implement an owning multi-dimensional array @@ -348,7 +354,7 @@ class mdarray typename container_policy_type::const_accessor_policy, typename container_policy_type::accessor_policy>> using view_type_impl = - std::conditional_t, device_mdspan>; @@ -672,6 +678,50 @@ template using device_matrix_view = device_mdspan, LayoutPolicy>; +/** + * @brief Create a raft::mdspan + * @tparam ElementType the data type of the matrix elements + * @tparam IndexType the index type of the extents + * @tparam LayoutPolicy policy for strides and layout ordering + * @tparam is_host_accessible whether the data is accessible on host + * @tparam is_device_accessible whether the data is accessible on device + * @param ptr Pointer to the data + * @param exts dimensionality of the array (series of integers) + * @return raft::mdspan + */ +template +auto make_mdspan(ElementType* ptr, extents exts) +{ + using accessor_type = detail::accessor_mixin, + is_host_accessible, + is_device_accessible>; + + return mdspan{ptr, exts}; +} + +/** + * @brief Create a raft::managed_mdspan + * @tparam ElementType the data type of the matrix elements + * @tparam IndexType the index type of the extents + * @tparam LayoutPolicy policy for strides and layout ordering + * @param ptr Pointer to the data + * @param exts dimensionality of the array (series of integers) + * @return raft::managed_mdspan + */ +template +auto make_managed_mdspan(ElementType* ptr, extents exts) +{ + return make_mdspan(ptr, exts); +} + /** * @brief Create a 0-dim (scalar) mdspan instance for host value. * @@ -983,7 +1033,7 @@ auto make_device_vector(raft::handle_t const& handle, IndexType n) * @return raft::host_mdspan or raft::device_mdspan with vector_extent * depending on AccessoryPolicy */ -template >* = nullptr> +template > auto flatten(mdspan_type mds) { RAFT_EXPECTS(mds.is_exhaustive(), "Input must be contiguous."); @@ -1024,7 +1074,7 @@ auto flatten(const array_interface_type& mda) template >* = nullptr> + typename = enable_if_mdspan> auto reshape(mdspan_type mds, extents new_shape) { RAFT_EXPECTS(mds.is_exhaustive(), "Input must be contiguous."); diff --git a/cpp/include/raft/detail/mdarray.hpp b/cpp/include/raft/detail/mdarray.hpp index 48094e3ccf..dd813a7c18 100644 --- a/cpp/include/raft/detail/mdarray.hpp +++ b/cpp/include/raft/detail/mdarray.hpp @@ -231,10 +231,15 @@ class host_vector_policy { /** * @brief A mixin to distinguish host and device memory. */ -template +template struct accessor_mixin : public AccessorPolicy { - using accessor_type = AccessorPolicy; - using is_host_type = std::conditional_t; + using accessor_type = AccessorPolicy; + using is_host_type = std::conditional_t; + using is_device_type = std::conditional_t; + using is_managed_type = std::conditional_t; + static constexpr bool is_host_accessible = is_host; + static constexpr bool is_device_accessible = is_device; + static constexpr bool is_managed_accessible = is_device && is_host; // make sure the explicit ctor can fall through using AccessorPolicy::AccessorPolicy; using offset_policy = accessor_mixin; @@ -242,10 +247,13 @@ struct accessor_mixin : public AccessorPolicy { }; template -using host_accessor = accessor_mixin; +using host_accessor = accessor_mixin; template -using device_accessor = accessor_mixin; +using device_accessor = accessor_mixin; + +template +using managed_accessor = accessor_mixin; namespace stdex = std::experimental; diff --git a/cpp/test/mdarray.cu b/cpp/test/mdarray.cu index 5dcddfc033..af7bb7adf3 100644 --- a/cpp/test/mdarray.cu +++ b/cpp/test/mdarray.cu @@ -382,6 +382,18 @@ void test_factory_methods() auto view = make_host_scalar_view(h_scalar.data_handle()); ASSERT_EQ(view(0), 17.0); } + + // managed + { + raft::handle_t handle{}; + auto mda = make_device_vector(handle, 10); + + auto mdv = make_managed_mdspan(mda.data_handle(), raft::vector_extent{10}); + + static_assert(decltype(mdv)::accessor_type::is_managed_accessible, "Not managed mdspan"); + + ASSERT_EQ(mdv.size(), 10); + } } } // anonymous namespace diff --git a/cpp/test/mdspan_utils.cu b/cpp/test/mdspan_utils.cu index b11a45d7e3..0d7d180b8f 100644 --- a/cpp/test/mdspan_utils.cu +++ b/cpp/test/mdspan_utils.cu @@ -62,9 +62,9 @@ void test_template_asserts() // Checking if types are host_mdspan static_assert(!is_host_mdspan_v>, - "device_matrix_view type not a host_mdspan"); + "device_matrix_view type is a host_mdspan"); static_assert(is_host_mdspan_v>, - "host_matrix_view type is a host_mdspan"); + "host_matrix_view type is not a host_mdspan"); // checking variadics static_assert(!is_mdspan_v>, "variadics mdspans"); @@ -171,12 +171,6 @@ void test_reshape() three_d_mdarray mda{layout, policy}; auto flat_view = reshape(mda, raft::extents{27}); - // this confirms aliasing works as intended - static_assert(std::is_same_v>, - "types not the same"); ASSERT_EQ(flat_view.extents().rank(), 1); ASSERT_EQ(flat_view.size(), mda.size()); @@ -195,12 +189,6 @@ void test_reshape() four_d_mdarray mda{layout, policy}; auto matrix = reshape(mda, raft::extents{4, 4}); - // this confirms aliasing works as intended - static_assert(std::is_same_v>, - "types not the same"); ASSERT_EQ(matrix.extents().rank(), 2); ASSERT_EQ(matrix.extent(0), 4); From ff133d4de2c552b54f4639eca19e9a0a958f41bb Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Fri, 2 Sep 2022 14:04:55 +0200 Subject: [PATCH 12/58] Complete the deprecation of duplicated hpp headers (#793) Replace all .hpp headers that have a .cuh header in the same directory with the same name by a simple include of the cuh header and a pragma warning of deprecation. This change hopefully prevents future head scratching when changes in a file are seemingly not picked up by the compiler.. Care has been taken to copy the right start year for the copyright line. Copyright lines have been updated to 2022 when necessary. The following template has been used for the .hpp header replacement text: ``` /* * %%COPYRIGHT_LINE%% * * [... snip license .. ] */ /** * This file is deprecated and will be removed in release 22.06. * Please use the cuh version instead. */ /** * DISCLAIMER: this file is deprecated: use %%CUH_FILE%% instead */ #pragma once #pragma message(__FILE__ \ " is deprecated and will be removed in a future release." \ " Please use the cuh version instead.") ``` Authors: - Allard Hendriksen (https://github.com/ahendriksen) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/793 --- BUILD.md | 4 +- cpp/bench/distance/distance_common.cuh | 4 +- cpp/bench/linalg/add.cu | 2 +- cpp/bench/linalg/map_then_reduce.cu | 2 +- cpp/bench/linalg/matrix_vector_op.cu | 2 +- cpp/bench/linalg/reduce.cu | 2 +- cpp/bench/random/make_blobs.cu | 2 +- cpp/bench/random/permute.cu | 2 +- cpp/bench/spatial/fused_l2_nn.cu | 6 +- cpp/bench/spatial/selection.cu | 2 +- cpp/include/raft/distance/fused_l2_nn.hpp | 103 +------ cpp/include/raft/distance/specializations.hpp | 11 +- cpp/include/raft/label/classlabels.hpp | 109 +------ cpp/include/raft/label/merge_labels.hpp | 62 +--- cpp/include/raft/lap/lap.hpp | 285 +----------------- cpp/include/raft/linalg/add.hpp | 76 +---- cpp/include/raft/linalg/axpy.hpp | 43 +-- cpp/include/raft/linalg/binary_op.hpp | 46 +-- .../raft/linalg/cholesky_r1_update.hpp | 126 +------- .../raft/linalg/coalesced_reduction.hpp | 64 +--- cpp/include/raft/linalg/contractions.hpp | 197 +----------- cpp/include/raft/linalg/divide.hpp | 37 +-- cpp/include/raft/linalg/eig.hpp | 106 +------ cpp/include/raft/linalg/eltwise.hpp | 92 +----- cpp/include/raft/linalg/gemm.hpp | 165 +--------- cpp/include/raft/linalg/gemv.hpp | 198 +----------- cpp/include/raft/linalg/init.hpp | 46 +-- cpp/include/raft/linalg/lanczos.hpp | 148 +-------- cpp/include/raft/linalg/lstsq.hpp | 109 +------ cpp/include/raft/linalg/map_then_reduce.hpp | 77 +---- cpp/include/raft/linalg/matrix_vector_op.hpp | 91 +----- .../raft/linalg/mean_squared_error.hpp | 35 +-- cpp/include/raft/linalg/multiply.hpp | 35 +-- cpp/include/raft/linalg/norm.hpp | 80 +---- cpp/include/raft/linalg/power.hpp | 55 +--- cpp/include/raft/linalg/reduce.hpp | 69 +---- .../raft/linalg/reduce_cols_by_key.hpp | 45 +-- .../raft/linalg/reduce_rows_by_key.hpp | 100 +----- cpp/include/raft/linalg/rsvd.hpp | 129 +------- cpp/include/raft/linalg/sqrt.hpp | 36 +-- cpp/include/raft/linalg/strided_reduction.hpp | 64 +--- cpp/include/raft/linalg/subtract.hpp | 75 +---- cpp/include/raft/linalg/svd.hpp | 174 +---------- cpp/include/raft/linalg/ternary_op.hpp | 42 +-- cpp/include/raft/linalg/transpose.hpp | 47 +-- cpp/include/raft/linalg/unary_op.hpp | 63 +--- cpp/include/raft/matrix/col_wise_sort.hpp | 44 +-- cpp/include/raft/matrix/matrix.hpp | 263 +--------------- cpp/include/raft/random/make_regression.hpp | 93 +----- .../raft/random/multi_variable_gaussian.hpp | 51 +--- cpp/include/raft/random/permute.hpp | 50 +-- cpp/include/raft/sparse/linalg/add.hpp | 85 +----- cpp/include/raft/sparse/linalg/degree.hpp | 109 +------ cpp/include/raft/sparse/linalg/norm.hpp | 59 +--- cpp/include/raft/sparse/linalg/spectral.hpp | 33 +- cpp/include/raft/sparse/linalg/transpose.hpp | 62 +--- cpp/include/raft/sparse/op/filter.hpp | 80 +---- cpp/include/raft/sparse/op/reduce.hpp | 75 +---- cpp/include/raft/sparse/op/row_op.hpp | 37 +-- cpp/include/raft/sparse/op/slice.hpp | 67 +--- cpp/include/raft/sparse/op/sort.hpp | 64 +--- .../sparse/selection/connect_components.hpp | 68 +---- cpp/include/raft/sparse/selection/knn.hpp | 90 +----- .../raft/sparse/selection/knn_graph.hpp | 51 +--- .../knn/detail/ann_kmeans_balanced.cuh | 2 +- .../raft/spatial/knn/detail/ann_utils.cuh | 2 +- .../raft/spatial/knn/epsilon_neighborhood.hpp | 51 +--- .../raft/spatial/knn/specializations.hpp | 13 +- cpp/include/raft/spectral/eigen_solvers.hpp | 95 +----- cpp/include/raft/stats/accuracy.hpp | 32 +- .../raft/stats/adjusted_rand_index.hpp | 39 +-- cpp/include/raft/stats/contingency_matrix.hpp | 91 +----- cpp/include/raft/stats/cov.hpp | 50 +-- .../raft/stats/detail/weighted_mean.cuh | 4 +- cpp/include/raft/stats/dispersion.hpp | 48 +-- cpp/include/raft/stats/entropy.hpp | 37 +-- cpp/include/raft/stats/histogram.hpp | 54 +--- cpp/include/raft/stats/homogeneity_score.hpp | 41 +-- .../raft/stats/information_criterion.hpp | 54 +--- cpp/include/raft/stats/kl_divergence.hpp | 34 +-- cpp/include/raft/stats/mean.hpp | 43 +-- cpp/include/raft/stats/mean_center.hpp | 69 +---- cpp/include/raft/stats/meanvar.hpp | 48 +-- cpp/include/raft/stats/minmax.hpp | 62 +--- cpp/include/raft/stats/mutual_info_score.hpp | 39 +-- cpp/include/raft/stats/r2_score.hpp | 38 +-- cpp/include/raft/stats/rand_index.hpp | 31 +- cpp/include/raft/stats/regression_metrics.hpp | 43 +-- cpp/include/raft/stats/silhouette_score.hpp | 65 +--- cpp/include/raft/stats/specializations.hpp | 12 +- cpp/include/raft/stats/stddev.hpp | 79 +---- cpp/include/raft/stats/sum.hpp | 39 +-- .../raft/stats/trustworthiness_score.hpp | 41 +-- cpp/include/raft/stats/v_measure.hpp | 40 +-- cpp/include/raft/stats/weighted_mean.hpp | 82 +---- cpp/test/spatial/ball_cover.cu | 4 +- 96 files changed, 567 insertions(+), 5464 deletions(-) diff --git a/BUILD.md b/BUILD.md index c4d8b1b356..3c6ad2bf20 100644 --- a/BUILD.md +++ b/BUILD.md @@ -205,8 +205,8 @@ The pre-compiled libraries contain template specializations for commonly used ty The following example tells the compiler to ignore the pre-compiled templates for the `libraft-distance` API so any symbols already compiled into pre-compiled shared library will be used instead: ```c++ -#include -#include +#include +#include ``` ### Building RAFT C++ from source in cmake diff --git a/cpp/bench/distance/distance_common.cuh b/cpp/bench/distance/distance_common.cuh index dae2550326..4f1a8ccab1 100644 --- a/cpp/bench/distance/distance_common.cuh +++ b/cpp/bench/distance/distance_common.cuh @@ -16,9 +16,9 @@ #include #include -#include +#include #if defined RAFT_DISTANCE_COMPILED -#include +#include #endif #include diff --git a/cpp/bench/linalg/add.cu b/cpp/bench/linalg/add.cu index 7c651b61ed..7d00b8cbae 100644 --- a/cpp/bench/linalg/add.cu +++ b/cpp/bench/linalg/add.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include namespace raft::bench::linalg { diff --git a/cpp/bench/linalg/map_then_reduce.cu b/cpp/bench/linalg/map_then_reduce.cu index 7eeb4a79b6..33a3e66264 100644 --- a/cpp/bench/linalg/map_then_reduce.cu +++ b/cpp/bench/linalg/map_then_reduce.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include namespace raft::bench::linalg { diff --git a/cpp/bench/linalg/matrix_vector_op.cu b/cpp/bench/linalg/matrix_vector_op.cu index d3a53ea345..aa8f2667ed 100644 --- a/cpp/bench/linalg/matrix_vector_op.cu +++ b/cpp/bench/linalg/matrix_vector_op.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include namespace raft::bench::linalg { diff --git a/cpp/bench/linalg/reduce.cu b/cpp/bench/linalg/reduce.cu index 018086a689..015e0b8abe 100644 --- a/cpp/bench/linalg/reduce.cu +++ b/cpp/bench/linalg/reduce.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include diff --git a/cpp/bench/random/make_blobs.cu b/cpp/bench/random/make_blobs.cu index c449223040..fdd4ef61d2 100644 --- a/cpp/bench/random/make_blobs.cu +++ b/cpp/bench/random/make_blobs.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include diff --git a/cpp/bench/random/permute.cu b/cpp/bench/random/permute.cu index a72eca3f87..5364bb44e3 100644 --- a/cpp/bench/random/permute.cu +++ b/cpp/bench/random/permute.cu @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu index dc3b507fbf..e5b5dc377a 100644 --- a/cpp/bench/spatial/fused_l2_nn.cu +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -17,13 +17,13 @@ #include #include #include -#include +#include #include -#include +#include #include #if defined RAFT_NN_COMPILED -#include +#include #endif namespace raft::bench::spatial { diff --git a/cpp/bench/spatial/selection.cu b/cpp/bench/spatial/selection.cu index c3a2bc6d3d..1f116c199f 100644 --- a/cpp/bench/spatial/selection.cu +++ b/cpp/bench/spatial/selection.cu @@ -18,7 +18,7 @@ #include #if defined RAFT_NN_COMPILED -#include +#include #endif #include diff --git a/cpp/include/raft/distance/fused_l2_nn.hpp b/cpp/include/raft/distance/fused_l2_nn.hpp index 768e33b3a7..74ad0974f4 100644 --- a/cpp/include/raft/distance/fused_l2_nn.hpp +++ b/cpp/include/raft/distance/fused_l2_nn.hpp @@ -18,105 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __FUSED_L2_NN_H -#define __FUSED_L2_NN_H - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace raft { -namespace distance { - -template -using KVPMinReduce = detail::KVPMinReduceImpl; - -template -using MinAndDistanceReduceOp = detail::MinAndDistanceReduceOpImpl; - -template -using MinReduceOp = detail::MinReduceOpImpl; - /** - * Initialize array using init value from reduction op + * DISCLAIMER: this file is deprecated: use fused_l2_nn.cuh instead */ -template -void initialize(const raft::handle_t& handle, OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp) -{ - detail::initialize(min, m, maxVal, redOp, handle.get_stream()); -} -/** - * @brief Fused L2 distance and 1-nearest-neighbor computation in a single call. - * - * The benefits of such a call are 2-fold: 1) eliminate the need for an - * intermediate buffer to store the output of gemm 2) reduce the memory read - * traffic on this intermediate buffer, otherwise needed during the reduction - * phase for 1-NN. - * - * @tparam DataT data type - * @tparam OutT output type to either store 1-NN indices and their minimum - * distances or store only the min distances. Accordingly, one - * has to pass an appropriate `ReduceOpT` - * @tparam IdxT indexing arithmetic type - * @tparam ReduceOpT A struct to perform the final needed reduction operation - * and also to initialize the output array elements with the - * appropriate initial value needed for reduction. - * - * @param[out] min will contain the reduced output (Length = `m`) - * (on device) - * @param[in] x first matrix. Row major. Dim = `m x k`. - * (on device). - * @param[in] y second matrix. Row major. Dim = `n x k`. - * (on device). - * @param[in] xn L2 squared norm of `x`. Length = `m`. (on device). - * @param[in] yn L2 squared norm of `y`. Length = `n`. (on device) - * @param[in] m gemm m - * @param[in] n gemm n - * @param[in] k gemm k - * @param[in] workspace temp workspace. Size = sizeof(int)*m. (on device) - * @param[in] redOp reduction operator in the epilogue - * @param[in] pairRedOp reduction operation on key value pairs - * @param[in] sqrt Whether the output `minDist` should contain L2-sqrt - * @param[in] initOutBuffer whether to initialize the output buffer before the - * main kernel launch - * @param[in] stream cuda stream - */ -template -void fusedL2NN(OutT* min, - const DataT* x, - const DataT* y, - const DataT* xn, - const DataT* yn, - IdxT m, - IdxT n, - IdxT k, - void* workspace, - ReduceOpT redOp, - KVPReduceOpT pairRedOp, - bool sqrt, - bool initOutBuffer, - cudaStream_t stream) -{ - size_t bytes = sizeof(DataT) * k; - if (16 % sizeof(DataT) == 0 && bytes % 16 == 0) { - detail::fusedL2NNImpl( - min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); - } else if (8 % sizeof(DataT) == 0 && bytes % 8 == 0) { - detail::fusedL2NNImpl( - min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); - } else { - detail::fusedL2NNImpl( - min, x, y, xn, yn, m, n, k, (int*)workspace, redOp, pairRedOp, sqrt, initOutBuffer, stream); - } -} +#pragma once -} // namespace distance -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "fused_l2_nn.cuh" diff --git a/cpp/include/raft/distance/specializations.hpp b/cpp/include/raft/distance/specializations.hpp index 641968d9f1..04afb73036 100644 --- a/cpp/include/raft/distance/specializations.hpp +++ b/cpp/include/raft/distance/specializations.hpp @@ -18,11 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __DISTANCE_SPECIALIZATIONS_H -#define __DISTANCE_SPECIALIZATIONS_H +/** + * DISCLAIMER: this file is deprecated: use specializations.cuh instead + */ #pragma once -#include +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "specializations.cuh" diff --git a/cpp/include/raft/label/classlabels.hpp b/cpp/include/raft/label/classlabels.hpp index 189c26f69f..4f47b426c0 100644 --- a/cpp/include/raft/label/classlabels.hpp +++ b/cpp/include/raft/label/classlabels.hpp @@ -13,110 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef __CLASS_LABELS_H -#define __CLASS_LABELS_H - -#pragma once - -#include - -namespace raft { -namespace label { - /** - * Get unique class labels. - * - * The y array is assumed to store class labels. The unique values are selected - * from this array. - * - * @tparam value_t numeric type of the arrays with class labels - * @param [inout] unique output unique labels - * @param [in] y device array of labels, size [n] - * @param [in] n number of labels - * @param [in] stream cuda stream - * @returns unique device array of unique labels, unallocated on entry, - * on exit it has size + * This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. */ -template -int getUniquelabels(rmm::device_uvector& unique, value_t* y, size_t n, cudaStream_t stream) -{ - return detail::getUniquelabels(unique, y, n, stream); -} /** - * Assign one versus rest labels. - * - * The output labels will have values +/-1: - * y_out = (y == y_unique[idx]) ? +1 : -1; - * - * The output type currently is set to value_t, but for SVM in principle we are - * free to choose other type for y_out (it should represent +/-1, and it is used - * in floating point arithmetics). - * - * @param [in] y device array if input labels, size [n] - * @param [in] n number of labels - * @param [in] y_unique device array of unique labels, size [n_classes] - * @param [in] n_classes number of unique labels - * @param [out] y_out device array of output labels - * @param [in] idx index of unique label that should be labeled as 1 - * @param [in] stream cuda stream - */ -template -void getOvrlabels( - value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream) -{ - detail::getOvrlabels(y, n, y_unique, n_classes, y_out, idx, stream); -} -/** - * Maps an input array containing a series of numbers into a new array - * where numbers have been mapped to a monotonically increasing set - * of labels. This can be useful in machine learning algorithms, for instance, - * where a given set of labels is not taken from a monotonically increasing - * set. This can happen if they are filtered or if only a subset of the - * total labels are used in a dataset. This is also useful in graph algorithms - * where a set of vertices need to be labeled in a monotonically increasing - * order. - * @tparam Type the numeric type of the input and output arrays - * @tparam Lambda the type of an optional filter function, which determines - * which items in the array to map. - * @param[out] out the output monotonic array - * @param[in] in input label array - * @param[in] N number of elements in the input array - * @param[in] stream cuda stream to use - * @param[in] filter_op an optional function for specifying which values - * should have monotonically increasing labels applied to them. - * @param[in] zero_based force monotonic set to start at 0? + * DISCLAIMER: this file is deprecated: use classlabels.cuh instead */ -template -void make_monotonic( - Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false) -{ - detail::make_monotonic(out, in, N, stream, filter_op, zero_based); -} -/** - * Maps an input array containing a series of numbers into a new array - * where numbers have been mapped to a monotonically increasing set - * of labels. This can be useful in machine learning algorithms, for instance, - * where a given set of labels is not taken from a monotonically increasing - * set. This can happen if they are filtered or if only a subset of the - * total labels are used in a dataset. This is also useful in graph algorithms - * where a set of vertices need to be labeled in a monotonically increasing - * order. - * @tparam Type the numeric type of the input and output arrays - * @param[out] out output label array with labels assigned monotonically - * @param[in] in input label array - * @param[in] N number of elements in the input array - * @param[in] stream cuda stream to use - * @param[in] zero_based force monotonic label set to start at 0? - */ -template -void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false) -{ - detail::make_monotonic(out, in, N, stream, zero_based); -} -}; // namespace label -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "classlabels.cuh" diff --git a/cpp/include/raft/label/merge_labels.hpp b/cpp/include/raft/label/merge_labels.hpp index 2bf2fa830b..7c0c25d038 100644 --- a/cpp/include/raft/label/merge_labels.hpp +++ b/cpp/include/raft/label/merge_labels.hpp @@ -13,59 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef __MERGE_LABELS_H -#define __MERGE_LABELS_H - -#pragma once - -#include - -namespace raft { -namespace label { +/** + * This file is deprecated and will be removed in release 22.06. + * Please use the cuh version instead. + */ /** - * @brief Merge two labellings in-place, according to a core mask - * - * A labelling is a representation of disjoint sets (groups) where points that - * belong to the same group have the same label. It is assumed that group - * labels take values between 1 and N. labels relate to points, i.e a label i+1 - * means that you belong to the same group as the point i. - * The special value MAX_LABEL is used to mark points that are not labelled. - * - * The two label arrays A and B induce two sets of groups over points 0..N-1. - * If a point is labelled i in A and j in B and the mask is true for this - * point, then i and j are equivalent labels and their groups are merged by - * relabeling the elements of both groups to have the same label. The new label - * is the smaller one from the original labels. - * It is required that if the mask is true for a point, this point is labelled - * (i.e its label is different than the special value MAX_LABEL). - * - * One use case is finding connected components: the two input label arrays can - * represent the connected components of graphs G_A and G_B, and the output - * would be the connected components labels of G_A \union G_B. - * - * @param[inout] labels_a First input, and output label array (in-place) - * @param[in] labels_b Second input label array - * @param[in] mask Core point mask - * @param[out] R label equivalence map - * @param[in] m Working flag - * @param[in] N Number of points in the dataset - * @param[in] stream CUDA stream + * DISCLAIMER: this file is deprecated: use merge_labels.cuh instead */ -template -void merge_labels(value_idx* labels_a, - const value_idx* labels_b, - const bool* mask, - value_idx* R, - bool* m, - value_idx N, - cudaStream_t stream) -{ - detail::merge_labels(labels_a, labels_b, mask, R, m, N, stream); -} -}; // namespace label -}; // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "merge_labels.cuh" diff --git a/cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.hpp index a9f205932c..badafb8afd 100644 --- a/cpp/include/raft/lap/lap.hpp +++ b/cpp/include/raft/lap/lap.hpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * Copyright 2020 KETAN DATE & RAKESH NAGI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,289 +11,21 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License.+ - * - * CUDA Implementation of O(n^3) alternating tree Hungarian Algorithm - * Authors: Ketan Date and Rakesh Nagi - * - * Article reference: - * Date, Ketan, and Rakesh Nagi. "GPU-accelerated Hungarian algorithms - * for the Linear Assignment Problem." Parallel Computing 57 (2016): 52-72. - * + * limitations under the License. */ - /** * This file is deprecated and will be removed in release 22.06. * Please use the cuh version instead. */ -#ifndef __LAP_H -#define __LAP_H +/** + * DISCLAIMER: this file is deprecated: use lap.cuh instead + */ #pragma once -#include -#include - -#include -#include - -#include "detail/d_structs.h" -#include "detail/lap_functions.cuh" - -namespace raft { -namespace lap { - -template -class LinearAssignmentProblem { - vertex_t size_; - vertex_t batchsize_; - weight_t epsilon_; - - weight_t const* d_costs_; - - Vertices d_vertices_dev; - VertexData d_row_data_dev, d_col_data_dev; - - raft::handle_t const& handle_; - rmm::device_uvector row_covers_v; - rmm::device_uvector col_covers_v; - rmm::device_uvector row_duals_v; - rmm::device_uvector col_duals_v; - rmm::device_uvector col_slacks_v; - rmm::device_uvector row_is_visited_v; - rmm::device_uvector col_is_visited_v; - rmm::device_uvector row_parents_v; - rmm::device_uvector col_parents_v; - rmm::device_uvector row_children_v; - rmm::device_uvector col_children_v; - rmm::device_uvector obj_val_primal_v; - rmm::device_uvector obj_val_dual_v; - - public: - LinearAssignmentProblem(raft::handle_t const& handle, - vertex_t size, - vertex_t batchsize, - weight_t epsilon) - : handle_(handle), - size_(size), - batchsize_(batchsize), - epsilon_(epsilon), - d_costs_(nullptr), - row_covers_v(0, handle_.get_stream()), - col_covers_v(0, handle_.get_stream()), - row_duals_v(0, handle_.get_stream()), - col_duals_v(0, handle_.get_stream()), - col_slacks_v(0, handle_.get_stream()), - row_is_visited_v(0, handle_.get_stream()), - col_is_visited_v(0, handle_.get_stream()), - row_parents_v(0, handle_.get_stream()), - col_parents_v(0, handle_.get_stream()), - row_children_v(0, handle_.get_stream()), - col_children_v(0, handle_.get_stream()), - obj_val_primal_v(0, handle_.get_stream()), - obj_val_dual_v(0, handle_.get_stream()) - { - } - - // Executes Hungarian algorithm on the input cost matrix. - void solve(weight_t const* d_cost_matrix, vertex_t* d_row_assignment, vertex_t* d_col_assignment) - { - initializeDevice(); - - d_vertices_dev.row_assignments = d_row_assignment; - d_vertices_dev.col_assignments = d_col_assignment; - - d_costs_ = d_cost_matrix; - - int step = 0; - - while (step != 100) { - switch (step) { - case 0: step = hungarianStep0(); break; - case 1: step = hungarianStep1(); break; - case 2: step = hungarianStep2(); break; - case 3: step = hungarianStep3(); break; - case 4: step = hungarianStep4(); break; - case 5: step = hungarianStep5(); break; - case 6: step = hungarianStep6(); break; - } - } - - d_costs_ = nullptr; - } - - // Function for getting optimal row dual vector for subproblem spId. - std::pair getRowDualVector(int spId) const - { - return std::make_pair(row_duals_v.data() + spId * size_, size_); - } - - // Function for getting optimal col dual vector for subproblem spId. - std::pair getColDualVector(int spId) - { - return std::make_pair(col_duals_v.data() + spId * size_, size_); - } - - // Function for getting optimal primal objective value for subproblem spId. - weight_t getPrimalObjectiveValue(int spId) - { - weight_t result; - raft::update_host(&result, obj_val_primal_v.data() + spId, 1, handle_.get_stream()); - CHECK_CUDA(handle_.get_stream()); - return result; - } - - // Function for getting optimal dual objective value for subproblem spId. - weight_t getDualObjectiveValue(int spId) - { - weight_t result; - raft::update_host(&result, obj_val_dual_v.data() + spId, 1, handle_.get_stream()); - CHECK_CUDA(handle_.get_stream()); - return result; - } - - private: - // Helper function for initializing global variables and arrays on a single host. - void initializeDevice() - { - cudaStream_t stream = handle_.get_stream(); - row_covers_v.resize(batchsize_ * size_, stream); - col_covers_v.resize(batchsize_ * size_, stream); - row_duals_v.resize(batchsize_ * size_, stream); - col_duals_v.resize(batchsize_ * size_, stream); - col_slacks_v.resize(batchsize_ * size_, stream); - row_is_visited_v.resize(batchsize_ * size_, stream); - col_is_visited_v.resize(batchsize_ * size_, stream); - row_parents_v.resize(batchsize_ * size_, stream); - col_parents_v.resize(batchsize_ * size_, stream); - row_children_v.resize(batchsize_ * size_, stream); - col_children_v.resize(batchsize_ * size_, stream); - obj_val_primal_v.resize(batchsize_, stream); - obj_val_dual_v.resize(batchsize_, stream); - - d_vertices_dev.row_covers = row_covers_v.data(); - d_vertices_dev.col_covers = col_covers_v.data(); - - d_vertices_dev.row_duals = row_duals_v.data(); - d_vertices_dev.col_duals = col_duals_v.data(); - d_vertices_dev.col_slacks = col_slacks_v.data(); - - d_row_data_dev.is_visited = row_is_visited_v.data(); - d_col_data_dev.is_visited = col_is_visited_v.data(); - d_row_data_dev.parents = row_parents_v.data(); - d_row_data_dev.children = row_children_v.data(); - d_col_data_dev.parents = col_parents_v.data(); - d_col_data_dev.children = col_children_v.data(); - - thrust::fill(thrust::device, row_covers_v.begin(), row_covers_v.end(), int{0}); - thrust::fill(thrust::device, col_covers_v.begin(), col_covers_v.end(), int{0}); - thrust::fill(thrust::device, row_duals_v.begin(), row_duals_v.end(), weight_t{0}); - thrust::fill(thrust::device, col_duals_v.begin(), col_duals_v.end(), weight_t{0}); - } - - // Function for calculating initial zeros by subtracting row and column minima from each element. - int hungarianStep0() - { - detail::initialReduction(handle_, d_costs_, d_vertices_dev, batchsize_, size_); - - return 1; - } - - // Function for calculating initial zeros by subtracting row and column minima from each element. - int hungarianStep1() - { - detail::computeInitialAssignments( - handle_, d_costs_, d_vertices_dev, batchsize_, size_, epsilon_); - - int next = 2; - - while (true) { - if ((next = hungarianStep2()) == 6) break; - - if ((next = hungarianStep3()) == 5) break; - - hungarianStep4(); - } - - return next; - } - - // Function for checking optimality and constructing predicates and covers. - int hungarianStep2() - { - int cover_count = detail::computeRowCovers( - handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_); - - int next = (cover_count == batchsize_ * size_) ? 6 : 3; - - return next; - } - - // Function for building alternating tree rooted at unassigned rows. - int hungarianStep3() - { - int next; - - rmm::device_scalar flag_v(handle_.get_stream()); - - bool h_flag = false; - flag_v.set_value_async(h_flag, handle_.get_stream()); - - detail::executeZeroCover(handle_, - d_costs_, - d_vertices_dev, - d_row_data_dev, - d_col_data_dev, - flag_v.data(), - batchsize_, - size_, - epsilon_); - - h_flag = flag_v.value(handle_.get_stream()); - - next = h_flag ? 4 : 5; - - return next; - } - - // Function for augmenting the solution along multiple node-disjoint alternating trees. - int hungarianStep4() - { - detail::reversePass(handle_, d_row_data_dev, d_col_data_dev, batchsize_, size_); - - detail::augmentationPass( - handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_); - - return 2; - } - - // Function for updating dual solution to introduce new zero-cost arcs. - int hungarianStep5() - { - detail::dualUpdate( - handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_, epsilon_); - - return 3; - } - - // Function for calculating primal and dual objective values at optimality. - int hungarianStep6() - { - detail::calcObjValPrimal(handle_, - obj_val_primal_v.data(), - d_costs_, - d_vertices_dev.row_assignments, - batchsize_, - size_); - - detail::calcObjValDual(handle_, obj_val_dual_v.data(), d_vertices_dev, batchsize_, size_); - - return 100; - } -}; - -} // namespace lap -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "lap.cuh" diff --git a/cpp/include/raft/linalg/add.hpp b/cpp/include/raft/linalg/add.hpp index a80398fcad..e7f9610892 100644 --- a/cpp/include/raft/linalg/add.hpp +++ b/cpp/include/raft/linalg/add.hpp @@ -18,78 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __ADD_H -#define __ADD_H - -#pragma once - -#include "detail/add.cuh" - -namespace raft { -namespace linalg { - -using detail::adds_scalar; - -/** - * @brief Elementwise scalar add operation on the input buffer - * - * @tparam InT input data-type. Also the data-type upon which the math ops - * will be performed - * @tparam OutT output data-type - * @tparam IdxType Integer type used to for addressing - * - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - */ -template -void addScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream) -{ - detail::addScalar(out, in, scalar, len, stream); -} - /** - * @brief Elementwise add operation on the input buffers - * @tparam InT input data-type. Also the data-type upon which the math ops - * will be performed - * @tparam OutT output data-type - * @tparam IdxType Integer type used to for addressing - * - * @param out the output buffer - * @param in1 the first input buffer - * @param in2 the second input buffer - * @param len number of elements in the input buffers - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use add.cuh instead */ -template -void add(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream) -{ - detail::add(out, in1, in2, len, stream); -} -/** Substract single value pointed by singleScalarDev parameter in device memory from inDev[i] and - * write result to outDev[i] - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param outDev the output buffer - * @param inDev the input buffer - * @param singleScalarDev pointer to the scalar located in device memory - * @param len number of elements in the input and output buffer - * @param stream cuda stream - */ -template -void addDevScalar(math_t* outDev, - const math_t* inDev, - const math_t* singleScalarDev, - IdxType len, - cudaStream_t stream) -{ - detail::addDevScalar(outDev, inDev, singleScalarDev, len, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "add.cuh" diff --git a/cpp/include/raft/linalg/axpy.hpp b/cpp/include/raft/linalg/axpy.hpp index c227ba66c8..8db4c5a6e8 100644 --- a/cpp/include/raft/linalg/axpy.hpp +++ b/cpp/include/raft/linalg/axpy.hpp @@ -18,43 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __AXPY_H -#define __AXPY_H - -#pragma once - -#include "detail/axpy.cuh" - -namespace raft::linalg { - /** - * @brief the wrapper of cublas axpy function - * It computes the following equation: y = alpha * x + y - * - * @tparam T the element type - * @tparam DevicePointerMode whether pointers alpha, beta point to device memory - * @param [in] handle raft handle - * @param [in] n number of elements in x and y - * @param [in] alpha host or device scalar - * @param [in] x vector of length n - * @param [in] incx stride between consecutive elements of x - * @param [inout] y vector of length n - * @param [in] incy stride between consecutive elements of y - * @param [in] stream + * DISCLAIMER: this file is deprecated: use axpy.cuh instead */ -template -void axpy(const raft::handle_t& handle, - const int n, - const T* alpha, - const T* x, - const int incx, - T* y, - const int incy, - cudaStream_t stream) -{ - detail::axpy(handle, n, alpha, x, incx, y, incy, stream); -} -} // namespace raft::linalg +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "axpy.cuh" diff --git a/cpp/include/raft/linalg/binary_op.hpp b/cpp/include/raft/linalg/binary_op.hpp index 9983e8ab50..f0a54cb164 100644 --- a/cpp/include/raft/linalg/binary_op.hpp +++ b/cpp/include/raft/linalg/binary_op.hpp @@ -18,46 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __BINARY_OP_H -#define __BINARY_OP_H - -#pragma once - -#include "detail/binary_op.cuh" - -#include - -namespace raft { -namespace linalg { - /** - * @brief perform element-wise binary operation on the input arrays - * @tparam InType input data-type - * @tparam Lambda the device-lambda performing the actual operation - * @tparam OutType output data-type - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads-per-block in the final kernel launched - * @param out the output array - * @param in1 the first input array - * @param in2 the second input array - * @param len number of elements in the input array - * @param op the device-lambda - * @param stream cuda stream where to launch work - * @note Lambda must be a functor with the following signature: - * `OutType func(const InType& val1, const InType& val2);` + * DISCLAIMER: this file is deprecated: use binary_op.cuh instead */ -template -void binaryOp( - OutType* out, const InType* in1, const InType* in2, IdxType len, Lambda op, cudaStream_t stream) -{ - detail::binaryOp(out, in1, in2, len, op, stream); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "binary_op.cuh" diff --git a/cpp/include/raft/linalg/cholesky_r1_update.hpp b/cpp/include/raft/linalg/cholesky_r1_update.hpp index 1158ad3aa4..a1967c36cb 100644 --- a/cpp/include/raft/linalg/cholesky_r1_update.hpp +++ b/cpp/include/raft/linalg/cholesky_r1_update.hpp @@ -18,126 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __CHOLESKY_R1_UPDATE_H -#define __CHOLESKY_R1_UPDATE_H +/** + * DISCLAIMER: this file is deprecated: use cholesky_r1_update.cuh instead + */ #pragma once -#include "detail/cholesky_r1_update.cuh" - -namespace raft { -namespace linalg { - -/** - * @brief Rank 1 update of Cholesky decomposition. - * - * This method is useful if an algorithm iteratively builds up matrix A, and - * the Cholesky decomposition of A is required at each step. - * - * On entry, L is the Cholesky decomposition of matrix A, where both A and L - * have size n-1 x n-1. We are interested in the Cholesky decomposition of a new - * matrix A', which we get by adding a row and column to A. In Python notation: - * - A'[0:n-1, 0:n-1] = A; - * - A'[:,n-1] = A[n-1,:] = A_new - * - * On entry, the new column A_new, is stored as the n-th column of L if uplo == - * CUBLAS_FILL_MODE_UPPER, else A_new is stored as the n-th row of L. - * - * On exit L contains the Cholesky decomposition of A'. In practice the elements - * of A_new are overwritten with new row/column of the L matrix. - * - * The uplo paramater is used to select the matrix layout. - * If (uplo != CUBLAS_FILL_MODE_UPPER) then the input arg L stores the - * lower triangular matrix L, so that A = L * L.T. Otherwise the input arg L - * stores an upper triangular matrix U: A = U.T * U. - * - * On exit L will be updated to store the Cholesky decomposition of A'. - * - * If the matrix is not positive definit, or very ill conditioned then the new - * diagonal element of L would be NaN. In such a case an exception is thrown. - * The eps argument can be used to override this behavior: if eps >= 0 then - * the diagonal element is replaced by eps in case the diagonal is NaN or - * smaller than eps. Note: for an iterative solver it is probably better to - * stop early in case of error, rather than relying on the eps parameter. - * - * Examples: - * - * - Lower triangular factorization: - * @code{.cpp} - * // Initialize arrays - * int ld_L = n_rows; - * rmm::device_uvector L(ld_L * n_rows, stream); - * raft::linalg::choleskyRank1Update(handle, L, n_rows, ld_L, nullptr, - * &n_bytes, CUBLAS_FILL_MODE_LOWER, - * stream); - * rmm::device_uvector workspace(n_bytes, stream); - * - * for (n=1; n<=n_rows; rank++) { - * // Calculate a new row/column of matrix A into A_new - * // ... - * // Copy new row to L[rank-1,:] - * RAFT_CUBLAS_TRY(cublasCopy(handle.get_cublas_handle(), n - 1, A_new, 1, - * L + n - 1, ld_L, stream)); - * // Update Cholesky factorization - * raft::linalg::choleskyRank1Update( - * handle, L, rank, ld_L, workspace, &n_bytes, CUBLAS_FILL_MODE_LOWER, - * stream); - * } - * Now L stores the Cholesky decomposition of A: A = L * L.T - * @endcode - * - * - Upper triangular factorization: - * @code{.cpp} - * // Initialize arrays - * int ld_U = n_rows; - * rmm::device_uvector U(ld_U * n_rows, stream); - * raft::linalg::choleskyRank1Update(handle, L, n_rows, ld_U, nullptr, - * &n_bytes, CUBLAS_FILL_MODE_UPPER, - * stream); - * rmm::device_uvector workspace(stream, n_bytes, stream); - * - * for (n=1; n<=n_rows; n++) { - * // Calculate a new row/column of matrix A into array A_new - * // ... - * // Copy new row to U[:,n-1] (column major layout) - * raft::copy(U + ld_U * (n-1), A_new, n-1, stream); - * // - * // Update Cholesky factorization - * raft::linalg::choleskyRank1Update( - * handle, U, n, ld_U, workspace, &n_bytes, CUBLAS_FILL_MODE_UPPER, - * stream); - * } - * // Now U stores the Cholesky decomposition of A: A = U.T * U - * @endcode - * - * @param handle RAFT handle (used to retrive cuBLAS handles). - * @param L device array for to store the triangular matrix L, and the new - * column of A in column major format, size [n*n] - * @param n number of elements in the new row. - * @param ld stride of colums in L - * @param workspace device pointer to workspace shall be nullptr ar an array - * of size [n_bytes]. - * @param n_bytes size of workspace is returned here if workspace==nullptr. - * @param stream CUDA stream - * @param uplo indicates whether L is stored as an upper or lower triangular - * matrix (CUBLAS_FILL_MODE_UPPER or CUBLAS_FILL_MODE_LOWER) - * @param eps numerical parameter that can act as a regularizer for ill - * conditioned systems. Negative values mean no regularizaton. - */ -template -void choleskyRank1Update(const raft::handle_t& handle, - math_t* L, - int n, - int ld, - void* workspace, - int* n_bytes, - cublasFillMode_t uplo, - cudaStream_t stream, - math_t eps = -1) -{ - detail::choleskyRank1Update(handle, L, n, ld, workspace, n_bytes, uplo, stream, eps); -} -}; // namespace linalg -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "cholesky_r1_update.cuh" diff --git a/cpp/include/raft/linalg/coalesced_reduction.hpp b/cpp/include/raft/linalg/coalesced_reduction.hpp index 48f8798a03..8631a7e5ba 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.hpp +++ b/cpp/include/raft/linalg/coalesced_reduction.hpp @@ -18,64 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __COALESCED_REDUCTION_H -#define __COALESCED_REDUCTION_H - -#pragma once - -#include "detail/coalesced_reduction.cuh" - -namespace raft { -namespace linalg { - /** - * @brief Compute reduction of the input matrix along the leading dimension - * - * @tparam InType the data type of the input - * @tparam OutType the data type of the output (as well as the data type for - * which reduction is performed) - * @tparam IdxType data type of the indices of the array - * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*MainLambda)(InType, IdxType);
- * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*ReduceLambda)(OutType);
- * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*FinalLambda)(OutType);
- * @param dots the output reduction vector - * @param data the input matrix - * @param D leading dimension of data - * @param N second dimension data - * @param init initial value to use for the reduction - * @param main_op elementwise operation to apply before reduction - * @param reduce_op binary reduction operation - * @param final_op elementwise operation to apply before storing results - * @param inplace reduction result added inplace or overwrites old values? - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use coalesced_reduction.cuh instead */ -template , - typename ReduceLambda = raft::Sum, - typename FinalLambda = raft::Nop> -void coalescedReduction(OutType* dots, - const InType* data, - int D, - int N, - OutType init, - cudaStream_t stream, - bool inplace = false, - MainLambda main_op = raft::Nop(), - ReduceLambda reduce_op = raft::Sum(), - FinalLambda final_op = raft::Nop()) -{ - detail::coalescedReduction(dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "coalesced_reduction.cuh" diff --git a/cpp/include/raft/linalg/contractions.hpp b/cpp/include/raft/linalg/contractions.hpp index 256593d9ae..7e5e9be403 100644 --- a/cpp/include/raft/linalg/contractions.hpp +++ b/cpp/include/raft/linalg/contractions.hpp @@ -18,199 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __CONTRACTIONS_H -#define __CONTRACTIONS_H - -#pragma once - -#include "detail/contractions.cuh" - -namespace raft { -namespace linalg { - -/** - * @brief This is the central enum that should be used to configure the perf - * landscape of the Contraction kernel. - * - * Main goal of this Policy struct is to provide sufficient knobs to tune the - * perf of Contraction kernel, as and when we see matrices of different shapes. - * - * @tparam DataT the IO and math datatype - * @tparam _veclen number of k-elements loaded by each thread for every LDG call - * it makes. This should be configured based on the input 'k' - * value and the input data type. For eg: if DataT = float and - * k is multiples of 4, then setting this to 4 gives the best - * LDG pattern. Possible values are {1, 2, 4}. - * @tparam _kblk number of k-elements operated upon per main-loop iteration. - * Therefore total number of main-loop iterations will be - * `ceil(k/_kblk)`. This must be multiples of `_veclen`. Do note - * that bigger this value, the greater shared mem requirement. - * @tparam _rpt Defines the number of rows that a given thread accumulates on. - * This directly results in increased register pressure. This - * also is used to compute the number of m-elements worked upon - * by each thread block. - * @tparam _cpt Defines the number of cols that a given thread accumulates on. - * This directly results in increased register pressure. This - * also is used to compute the number of n-elements worked upon - * by each thread block. - * @tparam _tr Number of threads working on the same output column. This is - * used to compute the number of m-elements worked upon by each - * thread block. This also determines the number of threads per - * thread block - * @tparam _tc Number of threads working on the same output row. This is - * used to compute the number of m-elements worked upon by each - * thread block. This also determines the number of threads per - * thread block - */ -template -struct KernelPolicy { - enum { - /** number of elements along K worked upon per main loop iteration */ - Kblk = _kblk, - /** number of elements loaded per LDG */ - Veclen = _veclen, - /** number of rows a thread works on for accumulation */ - AccRowsPerTh = _rpt, - /** number of cols a thread works on for accumulation */ - AccColsPerTh = _cpt, - /** number of threads working the same output col */ - AccThRows = _tr, - /** number of threads working the same output row */ - AccThCols = _tc, - /** total threads per block */ - Nthreads = AccThRows * AccThCols, - /** output tile size along rows */ - Mblk = AccRowsPerTh * AccThRows, - /** output tile size along cols */ - Nblk = AccColsPerTh * AccThCols, - /** number of threads loading a single row */ - LdgThRow = Kblk / Veclen, - /** number of LDGs issued by a single thread for X */ - LdgPerThX = Mblk * LdgThRow / Nthreads, - /** number of LDGs issued by a single thread for Y */ - LdgPerThY = Nblk * LdgThRow / Nthreads, - /** number of rows of X covered per LDG */ - LdgRowsX = Mblk / LdgPerThX, - /** number of rows of Y covered per LDG */ - LdgRowsY = Nblk / LdgPerThY, - /** stride for accessing X/Y data in shared mem */ - SmemStride = Kblk + Veclen, - /** size of one page for storing X data */ - SmemPageX = SmemStride * Mblk, - /** size of one page for storing Y data */ - SmemPageY = SmemStride * Nblk, - /** size of one smem page */ - SmemPage = SmemPageX + SmemPageY, - /** size (in B) for smem needed */ - SmemSize = 2 * SmemPage * sizeof(DataT), - }; // enum - -}; // struct KernelPolicy - -template -struct ColKernelPolicy { - enum { - /** number of elements along K worked upon per main loop iteration */ - Kblk = _kblk, - /** number of elements loaded per LDG */ - Veclen = _veclen, - /** number of rows a thread works on for accumulation */ - AccRowsPerTh = _rpt, - /** number of cols a thread works on for accumulation */ - AccColsPerTh = _cpt, - /** number of threads working the same output col */ - AccThRows = _tr, - /** number of threads working the same output row */ - AccThCols = _tc, - /** total threads per block */ - Nthreads = AccThRows * AccThCols, - /** output tile size along rows */ - Mblk = AccRowsPerTh * AccThRows, - /** output tile size along cols */ - Nblk = AccColsPerTh * AccThCols, - /** number of threads loading a single col */ - LdgThRow = Mblk / Veclen, - /** number of LDGs issued by a single thread for X */ - LdgPerThX = Kblk * LdgThRow / Nthreads, - /** number of LDGs issued by a single thread for Y */ - LdgPerThY = Kblk * LdgThRow / Nthreads, - /** number of rows of X covered per LDG */ - LdgRowsX = Kblk / LdgPerThX, - /** number of rows of Y covered per LDG */ - LdgRowsY = Kblk / LdgPerThY, - /** stride for accessing X/Y data in shared mem */ - SmemStride = Mblk + Veclen, - /** size of one page for storing X data */ - SmemPageX = SmemStride * Kblk, - /** size of one page for storing Y data */ - SmemPageY = SmemStride * Kblk, - /** size of one smem page */ - SmemPage = SmemPageX + SmemPageY, - /** size (in B) for smem needed */ - SmemSize = 2 * SmemPage * sizeof(DataT), - }; // colMajor enum - static_assert(Mblk == Nblk, "Mblk should be equal to Nblk"); -}; /** - * @defgroup Policy4x4 16 elements per thread Policy with k-block = 32 - * @{ + * DISCLAIMER: this file is deprecated: use contractions.cuh instead */ -template -struct Policy4x4 { -}; - -template -struct Policy4x4 { - typedef KernelPolicy Policy; - typedef ColKernelPolicy ColPolicy; -}; -template -struct Policy4x4 { - typedef KernelPolicy Policy; - typedef ColKernelPolicy ColPolicy; -}; -/** @} */ - -/** - * @defgroup Policy2x8 16 elements per thread Policy with k-block = 16 - * @{ - */ -template -struct Policy2x8 { -}; - -template -struct Policy2x8 { - typedef KernelPolicy Policy; - typedef ColKernelPolicy ColPolicy; -}; - -template -struct Policy2x8 { - // this is not used just for keeping compiler happy. - typedef KernelPolicy Policy; - typedef ColKernelPolicy ColPolicy; -}; -/** @} */ - -/** - * @brief Base class for gemm-like NT contractions - * - * This class does not provide any arithmetic operations, but only provides the - * memory-related operations of loading the `x` and `y` matrix blocks from the - * global memory into shared memory and then from shared into registers. Thus, - * this class acts as a basic building block for further composing gemm-like NT - * contractions on input matrices which are row-major (and so does the output) - * - * @tparam DataT IO and math data type - * @tparam IdxT indexing type - * @tparam Policy policy used to customize memory access behavior. - * See documentation for `KernelPolicy` to know more. - */ -using detail::Contractions_NT; +#pragma once -} // namespace linalg -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "contractions.cuh" diff --git a/cpp/include/raft/linalg/divide.hpp b/cpp/include/raft/linalg/divide.hpp index 8d1bd37186..57f4376fcc 100644 --- a/cpp/include/raft/linalg/divide.hpp +++ b/cpp/include/raft/linalg/divide.hpp @@ -18,37 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __DIVIDE_H -#define __DIVIDE_H - -#pragma once - -#include "detail/divide.cuh" - -namespace raft { -namespace linalg { - -using detail::divides_scalar; - /** - * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - * @{ + * DISCLAIMER: this file is deprecated: use divide.cuh instead */ -template -void divideScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) -{ - detail::divideScalar(out, in, scalar, len, stream); -} -/** @} */ -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "divide.cuh" diff --git a/cpp/include/raft/linalg/eig.hpp b/cpp/include/raft/linalg/eig.hpp index 032c4e97f9..175a2aaccc 100644 --- a/cpp/include/raft/linalg/eig.hpp +++ b/cpp/include/raft/linalg/eig.hpp @@ -18,108 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __EIG_H -#define __EIG_H - -#pragma once - -#include "detail/eig.cuh" - -namespace raft { -namespace linalg { - -/** - * @defgroup eig Eigen Decomposition Methods - * @{ - */ - /** - * @brief eig decomp with divide and conquer method for the column-major - * symmetric matrices - * @param handle raft handle - * @param in the input buffer (symmetric matrix that has real eig values and - * vectors. - * @param n_rows: number of rows of the input - * @param n_cols: number of cols of the input - * @param eig_vectors: eigenvectors - * @param eig_vals: eigen values - * @param stream cuda stream + * DISCLAIMER: this file is deprecated: use eig.cuh instead */ -template -void eigDC(const raft::handle_t& handle, - const math_t* in, - std::size_t n_rows, - std::size_t n_cols, - math_t* eig_vectors, - math_t* eig_vals, - cudaStream_t stream) -{ - detail::eigDC(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream); -} -using detail::COPY_INPUT; -using detail::EigVecMemUsage; -using detail::OVERWRITE_INPUT; - -/** - * @brief eig sel decomp with divide and conquer method for the column-major - * symmetric matrices - * @param handle raft handle - * @param in the input buffer (symmetric matrix that has real eig values and - * vectors. - * @param n_rows: number of rows of the input - * @param n_cols: number of cols of the input - * @param n_eig_vals: number of eigenvectors to be generated - * @param eig_vectors: eigenvectors - * @param eig_vals: eigen values - * @param memUsage: the memory selection for eig vector output - * @param stream cuda stream - */ -template -void eigSelDC(const raft::handle_t& handle, - math_t* in, - int n_rows, - int n_cols, - int n_eig_vals, - math_t* eig_vectors, - math_t* eig_vals, - EigVecMemUsage memUsage, - cudaStream_t stream) -{ - detail::eigSelDC(handle, in, n_rows, n_cols, n_eig_vals, eig_vectors, eig_vals, memUsage, stream); -} - -/** - * @brief overloaded function for eig decomp with Jacobi method for the - * column-major symmetric matrices (in parameter) - * @param handle: raft handle - * @param in: input matrix - * @param n_rows: number of rows of the input - * @param n_cols: number of cols of the input - * @param eig_vectors: eigenvectors - * @param eig_vals: eigen values - * @param stream: stream on which this function will be run - * @param tol: error tolerance for the jacobi method. Algorithm stops when the - * error is below tol - * @param sweeps: number of sweeps in the Jacobi algorithm. The more the better - * accuracy. - */ -template -void eigJacobi(const raft::handle_t& handle, - const math_t* in, - int n_rows, - int n_cols, - math_t* eig_vectors, - math_t* eig_vals, - cudaStream_t stream, - math_t tol = 1.e-7, - int sweeps = 15) -{ - detail::eigJacobi(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream, tol, sweeps); -} -/** @} */ // end of eig +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "eig.cuh" diff --git a/cpp/include/raft/linalg/eltwise.hpp b/cpp/include/raft/linalg/eltwise.hpp index 62624f6eeb..8931c88241 100644 --- a/cpp/include/raft/linalg/eltwise.hpp +++ b/cpp/include/raft/linalg/eltwise.hpp @@ -18,94 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __ELTWISE_H -#define __ELTWISE_H - -#pragma once - -#include "detail/eltwise.cuh" - -namespace raft { -namespace linalg { - -using detail::adds_scalar; - -/** - * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam InType data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - * @{ - */ -template -void scalarAdd(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream) -{ - detail::scalarAdd(out, in, scalar, len, stream); -} - -using detail::multiplies_scalar; - -template -void scalarMultiply(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream) -{ - detail::scalarMultiply(out, in, scalar, len, stream); -} -/** @} */ - /** - * @defgroup BinaryOps Element-wise binary operations on the input buffers - * @tparam InType data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in1 the first input buffer - * @param in2 the second input buffer - * @param len number of elements in the input buffers - * @param stream cuda stream where to launch work - * @{ + * DISCLAIMER: this file is deprecated: use eltwise.cuh instead */ -template -void eltwiseAdd( - OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) -{ - detail::eltwiseAdd(out, in1, in2, len, stream); -} - -template -void eltwiseSub( - OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) -{ - detail::eltwiseSub(out, in1, in2, len, stream); -} -template -void eltwiseMultiply( - OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) -{ - detail::eltwiseMultiply(out, in1, in2, len, stream); -} - -template -void eltwiseDivide( - OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) -{ - detail::eltwiseDivide(out, in1, in2, len, stream); -} - -using detail::divides_check_zero; - -template -void eltwiseDivideCheckZero( - OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream) -{ - detail::eltwiseDivideCheckZero(out, in1, in2, len, stream); -} -/** @} */ +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "eltwise.cuh" diff --git a/cpp/include/raft/linalg/gemm.hpp b/cpp/include/raft/linalg/gemm.hpp index 37c6b2d552..6ad2f1fbe1 100644 --- a/cpp/include/raft/linalg/gemm.hpp +++ b/cpp/include/raft/linalg/gemm.hpp @@ -18,167 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __GEMM_H -#define __GEMM_H - -#pragma once - -#include "detail/gemm.hpp" - -namespace raft { -namespace linalg { - /** - * @brief the wrapper of cublas gemm function - * It computes the following equation: C = alpha .* opA(A) * opB(B) + beta .* C - * - * @tparam math_t the element type - * @tparam DevicePointerMode whether pointers alpha, beta point to device memory - * @param [in] handle raft handle - * @param [in] trans_a cublas transpose op for A - * @param [in] trans_b cublas transpose op for B - * @param [in] m number of rows of C - * @param [in] n number of columns of C - * @param [in] k number of rows of opB(B) / number of columns of opA(A) - * @param [in] alpha host or device scalar - * @param [in] A such a matrix that the shape of column-major opA(A) is [m, k] - * @param [in] lda leading dimension of A - * @param [in] B such a matrix that the shape of column-major opA(B) is [k, n] - * @param [in] ldb leading dimension of B - * @param [in] beta host or device scalar - * @param [inout] C column-major matrix of size [m, n] - * @param [in] ldc leading dimension of C - * @param [in] stream + * DISCLAIMER: this file is deprecated: use gemm.cuh instead */ -template -void gemm(const raft::handle_t& handle, - const bool trans_a, - const bool trans_b, - const int m, - const int n, - const int k, - const math_t* alpha, - const math_t* A, - const int lda, - const math_t* B, - const int ldb, - const math_t* beta, - math_t* C, - const int ldc, - cudaStream_t stream) -{ - detail::gemm( - handle, trans_a, trans_b, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, stream); -} -/** - * @brief the wrapper of cublas gemm function - * It computes the following equation: D = alpha . opA(A) * opB(B) + beta . C - * @tparam math_t the type of input/output matrices - * @param handle raft handle - * @param a input matrix - * @param n_rows_a number of rows of A - * @param n_cols_a number of columns of A - * @param b input matrix - * @param c output matrix - * @param n_rows_c number of rows of C - * @param n_cols_c number of columns of C - * @param trans_a cublas transpose op for A - * @param trans_b cublas transpose op for B - * @param alpha scalar - * @param beta scalar - * @param stream cuda stream - */ -template -void gemm(const raft::handle_t& handle, - const math_t* a, - int n_rows_a, - int n_cols_a, - const math_t* b, - math_t* c, - int n_rows_c, - int n_cols_c, - cublasOperation_t trans_a, - cublasOperation_t trans_b, - math_t alpha, - math_t beta, - cudaStream_t stream) -{ - detail::gemm( - handle, a, n_rows_a, n_cols_a, b, c, n_rows_c, n_cols_c, trans_a, trans_b, alpha, beta, stream); -} - -/** - * @brief the wrapper of cublas gemm function - * It computes the following equation: D = alpha . opA(A) * opB(B) + beta . C - * @tparam math_t the type of input/output matrices - * @param handle raft handle - * @param a input matrix - * @param n_rows_a number of rows of A - * @param n_cols_a number of columns of A - * @param b input matrix - * @param c output matrix - * @param n_rows_c number of rows of C - * @param n_cols_c number of columns of C - * @param trans_a cublas transpose op for A - * @param trans_b cublas transpose op for B - * @param stream cuda stream - */ -template -void gemm(const raft::handle_t& handle, - const math_t* a, - int n_rows_a, - int n_cols_a, - const math_t* b, - math_t* c, - int n_rows_c, - int n_cols_c, - cublasOperation_t trans_a, - cublasOperation_t trans_b, - cudaStream_t stream) -{ - detail::gemm(handle, a, n_rows_a, n_cols_a, b, c, n_rows_c, n_cols_c, trans_a, trans_b, stream); -} - -/** - * @brief A wrapper for CUBLS GEMM function designed for handling all possible - * combinations of operand layouts. - * It computes the following equation: Z = alpha . X * Y + beta . Z - * @tparam T Data type of input/output matrices (float/double) - * @param handle raft handle - * @param z output matrix of size M rows x N columns - * @param x input matrix of size M rows x K columns - * @param y input matrix of size K rows x N columns - * @param _M number of rows of X and Z - * @param _N number of rows of Y and columns of Z - * @param _K number of columns of X and rows of Y - * @param isZColMajor Storage layout of Z. true = col major, false = row major - * @param isXColMajor Storage layout of X. true = col major, false = row major - * @param isYColMajor Storage layout of Y. true = col major, false = row major - * @param stream cuda stream - * @param alpha scalar - * @param beta scalar - */ -template -void gemm(const raft::handle_t& handle, - T* z, - T* x, - T* y, - int _M, - int _N, - int _K, - bool isZColMajor, - bool isXColMajor, - bool isYColMajor, - cudaStream_t stream, - T alpha = T(1.0), - T beta = T(0.0)) -{ - detail::gemm( - handle, z, x, y, _M, _N, _K, isZColMajor, isXColMajor, isYColMajor, stream, alpha, beta); -} +#pragma once -} // end namespace linalg -} // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif +#include "gemm.cuh" diff --git a/cpp/include/raft/linalg/gemv.hpp b/cpp/include/raft/linalg/gemv.hpp index 3b6b60263b..8161631fd3 100644 --- a/cpp/include/raft/linalg/gemv.hpp +++ b/cpp/include/raft/linalg/gemv.hpp @@ -18,200 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __GEMV_H -#define __GEMV_H - -#pragma once - -#include "detail/gemv.hpp" - -namespace raft { -namespace linalg { - -/** - * @brief the wrapper of cublas gemv function - * It computes the following equation: y = alpha .* op(A) * x + beta .* y - * - * @tparam math_t the element type - * @tparam DevicePointerMode whether pointers alpha, beta point to device memory - * @param [in] handle raft handle - * @param [in] trans_a cublas transpose op for A - * @param [in] m number of rows of A - * @param [in] n number of columns of A - * @param [in] alpha host or device scalar - * @param [in] A column-major matrix of size [m, n] - * @param [in] lda leading dimension of A - * @param [in] x vector of length n if trans_a else m - * @param [in] incx stride between consecutive elements of x - * @param [in] beta host or device scalar - * @param [inout] y vector of length m if trans_a else n - * @param [in] incy stride between consecutive elements of y - * @param [in] stream - */ -template -void gemv(const raft::handle_t& handle, - const bool trans_a, - const int m, - const int n, - const math_t* alpha, - const math_t* A, - const int lda, - const math_t* x, - const int incx, - const math_t* beta, - math_t* y, - const int incy, - cudaStream_t stream) -{ - detail::gemv( - handle, trans_a, m, n, alpha, A, lda, x, incx, beta, y, incy, stream); -} - -template -void gemv(const raft::handle_t& handle, - const math_t* A, - const int n_rows, - const int n_cols, - const math_t* x, - const int incx, - math_t* y, - const int incy, - const bool trans_a, - const math_t alpha, - const math_t beta, - cudaStream_t stream) -{ - detail::gemv(handle, A, n_rows, n_cols, x, incx, y, incy, trans_a, alpha, beta, stream); -} - -/** - * y = alpha * op(A) * x + beta * y - * - * where - * - * @param handle raft handle - * @param A is a column-major matrix of size n_rows_a * n_cols_a. - * op(A) is either the transpose operation (trans_a == true) or identity. - * @param n_rows_a number of rows in A - * @param n_cols_a number of cols in A - * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. - * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. - * @param trans_a whether to take transpose of a - * @param alpha is a scalar scale of Ax. - * @param beta is a scalar scale of y. - * @param stream stream on which this function is run - */ -template -void gemv(const raft::handle_t& handle, - const math_t* A, - const int n_rows_a, - const int n_cols_a, - const math_t* x, - math_t* y, - const bool trans_a, - const math_t alpha, - const math_t beta, - cudaStream_t stream) -{ - detail::gemv(handle, A, n_rows_a, n_cols_a, x, y, trans_a, alpha, beta, stream); -} - /** - * y = op(A) * x - * - * where - * - * @param handle raft handle - * @param A is a column-major matrix of size n_rows_a * n_cols_a. - * op(A) is either the transpose operation (trans_a == true) or identity. - * @param n_rows_a number of rows in A - * @param n_cols_a number of cols in A - * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. - * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. - * @param trans_a whether to take transpose of a - * @param stream stream on which this function is run + * DISCLAIMER: this file is deprecated: use gemv.cuh instead */ -template -void gemv(const raft::handle_t& handle, - const math_t* A, - const int n_rows_a, - const int n_cols_a, - const math_t* x, - math_t* y, - const bool trans_a, - cudaStream_t stream) -{ - detail::gemv(handle, A, n_rows_a, n_cols_a, x, y, trans_a, stream); -} -/** - * y = alpha * op(A) * x + beta * y - * - * where - * @param handle raft handle - * @param A is a column-major matrix of size n_rows_a * n_cols_a. - * op(A) is either the transpose operation (trans_a == true) or identity. - * @param n_rows_a number of rows in A - * @param n_cols_a number of cols in A - * @param lda is the leading dimension of A (number of rows); lda must be not smaller than n_rows_a. - * set it when you need to use only the first n_rows_a rows of the matrix A, which has - * (perhaps, due to padding) lda rows. - * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. - * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. - * @param trans_a whether to take transpose of a - * @param alpha is a scalar scale of Ax. - * @param beta is a scalar scale of y. - * @param stream stream on which this function is run - */ -template -void gemv(const raft::handle_t& handle, - const math_t* A, - const int n_rows_a, - const int n_cols_a, - const int lda, - const math_t* x, - math_t* y, - const bool trans_a, - const math_t alpha, - const math_t beta, - cudaStream_t stream) -{ - detail::gemv(handle, A, n_rows_a, n_cols_a, lda, x, y, trans_a, alpha, beta, stream); -} - -/** - * y = op(A) * x - * - * where - * @param handle raft handle - * @param A is a column-major matrix of size n_rows_a * n_cols_a. - * op(A) is either the transpose operation (trans_a == true) or identity. - * @param n_rows_a number of rows in A - * @param n_cols_a number of cols in A - * @param lda is the leading dimension of A (number of rows); lda must be not smaller than n_rows_a. - * set it when you need to use only the first n_rows_a rows of the matrix A, which has - * (perhaps, due to padding) lda rows. - * @param x is a vector of size `trans_a ? n_rows_a : n_cols_a`. - * @param y is a vector of size `trans_a ? n_cols_a : n_rows_a`. - * @param trans_a whether to take transpose of a - * @param stream stream on which this function is run - * - */ -template -void gemv(const raft::handle_t& handle, - const math_t* A, - const int n_rows_a, - const int n_cols_a, - const int lda, - const math_t* x, - math_t* y, - const bool trans_a, - cudaStream_t stream) -{ - detail::gemv(handle, A, n_rows_a, n_cols_a, lda, x, y, trans_a, stream); -} +#pragma once -}; // namespace linalg -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "gemv.cuh" diff --git a/cpp/include/raft/linalg/init.hpp b/cpp/include/raft/linalg/init.hpp index db7b0f9cfe..9c59c886c9 100644 --- a/cpp/include/raft/linalg/init.hpp +++ b/cpp/include/raft/linalg/init.hpp @@ -18,48 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __INIT_H -#define __INIT_H - -#pragma once - -#include "detail/init.hpp" - -namespace raft { -namespace linalg { - /** - * @brief Like Python range. - * - * Fills the output as out[i] = i. - * - * \param [out] out device array, size [end-start] - * \param [in] start of the range - * \param [in] end of range (exclusive) - * \param [in] stream cuda stream + * DISCLAIMER: this file is deprecated: use init.cuh instead */ -template -void range(T* out, int start, int end, cudaStream_t stream) -{ - detail::range(out, start, end, stream); -} -/** - * @brief Like Python range. - * - * Fills the output as out[i] = i. - * - * \param [out] out device array, size [n] - * \param [in] n length of the array - * \param [in] stream cuda stream - */ -template -void range(T* out, int n, cudaStream_t stream) -{ - detail::range(out, n, stream); -} +#pragma once -} // namespace linalg -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "init.cuh" diff --git a/cpp/include/raft/linalg/lanczos.hpp b/cpp/include/raft/linalg/lanczos.hpp index 75e3d11444..0529db6b5b 100644 --- a/cpp/include/raft/linalg/lanczos.hpp +++ b/cpp/include/raft/linalg/lanczos.hpp @@ -18,150 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __LANCZOS_H -#define __LANCZOS_H - -#pragma once - -#include "detail/lanczos.cuh" -#include - -namespace raft { -namespace linalg { - -// ========================================================= -// Eigensolver -// ========================================================= - /** - * @brief Compute smallest eigenvectors of symmetric matrix - * Computes eigenvalues and eigenvectors that are least - * positive. If matrix is positive definite or positive - * semidefinite, the computed eigenvalues are smallest in - * magnitude. - * The largest eigenvalue is estimated by performing several - * Lanczos iterations. An implicitly restarted Lanczos method is - * then applied to A+s*I, where s is negative the largest - * eigenvalue. - * @tparam index_type_t the type of data used for indexing. - * @tparam value_type_t the type of data used for weights, distances. - * @param handle the raft handle. - * @param A Matrix. - * @param nEigVecs Number of eigenvectors to compute. - * @param maxIter Maximum number of Lanczos steps. Does not include - * Lanczos steps used to estimate largest eigenvalue. - * @param restartIter Maximum size of Lanczos system before - * performing an implicit restart. Should be at least 4. - * @param tol Convergence tolerance. Lanczos iteration will - * terminate when the residual norm is less than tol*theta, where - * theta is an estimate for the smallest unwanted eigenvalue - * (i.e. the (nEigVecs+1)th smallest eigenvalue). - * @param reorthogonalize Whether to reorthogonalize Lanczos - * vectors. - * @param iter On exit, pointer to total number of Lanczos - * iterations performed. Does not include Lanczos steps used to - * estimate largest eigenvalue. - * @param eigVals_dev (Output, device memory, nEigVecs entries) - * Smallest eigenvalues of matrix. - * @param eigVecs_dev (Output, device memory, n*nEigVecs entries) - * Eigenvectors corresponding to smallest eigenvalues of - * matrix. Vectors are stored as columns of a column-major matrix - * with dimensions n x nEigVecs. - * @param seed random seed. - * @return error flag. + * DISCLAIMER: this file is deprecated: use lanczos.cuh instead */ -template -int computeSmallestEigenvectors( - handle_t const& handle, - spectral::matrix::sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 1234567) -{ - return detail::computeSmallestEigenvectors(handle, - A, - nEigVecs, - maxIter, - restartIter, - tol, - reorthogonalize, - iter, - eigVals_dev, - eigVecs_dev, - seed); -} -/** - * @brief Compute largest eigenvectors of symmetric matrix - * Computes eigenvalues and eigenvectors that are least - * positive. If matrix is positive definite or positive - * semidefinite, the computed eigenvalues are largest in - * magnitude. - * The largest eigenvalue is estimated by performing several - * Lanczos iterations. An implicitly restarted Lanczos method is - * then applied to A+s*I, where s is negative the largest - * eigenvalue. - * @tparam index_type_t the type of data used for indexing. - * @tparam value_type_t the type of data used for weights, distances. - * @param handle the raft handle. - * @param A Matrix. - * @param nEigVecs Number of eigenvectors to compute. - * @param maxIter Maximum number of Lanczos steps. Does not include - * Lanczos steps used to estimate largest eigenvalue. - * @param restartIter Maximum size of Lanczos system before - * performing an implicit restart. Should be at least 4. - * @param tol Convergence tolerance. Lanczos iteration will - * terminate when the residual norm is less than tol*theta, where - * theta is an estimate for the largest unwanted eigenvalue - * (i.e. the (nEigVecs+1)th largest eigenvalue). - * @param reorthogonalize Whether to reorthogonalize Lanczos - * vectors. - * @param iter On exit, pointer to total number of Lanczos - * iterations performed. Does not include Lanczos steps used to - * estimate largest eigenvalue. - * @param eigVals_dev (Output, device memory, nEigVecs entries) - * Largest eigenvalues of matrix. - * @param eigVecs_dev (Output, device memory, n*nEigVecs entries) - * Eigenvectors corresponding to largest eigenvalues of - * matrix. Vectors are stored as columns of a column-major matrix - * with dimensions n x nEigVecs. - * @param seed random seed. - * @return error flag. - */ -template -int computeLargestEigenvectors( - handle_t const& handle, - spectral::matrix::sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 123456) -{ - return detail::computeLargestEigenvectors(handle, - A, - nEigVecs, - maxIter, - restartIter, - tol, - reorthogonalize, - iter, - eigVals_dev, - eigVecs_dev, - seed); -} +#pragma once -} // namespace linalg -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "lanczos.cuh" diff --git a/cpp/include/raft/linalg/lstsq.hpp b/cpp/include/raft/linalg/lstsq.hpp index f90cd00ea3..3dfbea0629 100644 --- a/cpp/include/raft/linalg/lstsq.hpp +++ b/cpp/include/raft/linalg/lstsq.hpp @@ -18,109 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __LSTSQ_H -#define __LSTSQ_H - -#pragma once - -#include -#include -namespace raft { -namespace linalg { - -/** Solves the linear ordinary least squares problem `Aw = b` - * Via SVD decomposition of `A = U S Vt` using default cuSOLVER routine. - * - * @param[in] handle raft handle - * @param[inout] A input feature matrix. - * Warning: the content of this matrix is modified by the cuSOLVER routines. - * @param[in] n_rows number of rows in A - * @param[in] n_cols number of columns in A - * @param[inout] b input target vector. - * Warning: the content of this vector is modified by the cuSOLVER routines. - * @param[out] w output coefficient vector - * @param[in] stream cuda stream for ordering operations - */ -template -void lstsqSvdQR(const raft::handle_t& handle, - math_t* A, - const int n_rows, - const int n_cols, - const math_t* b, - math_t* w, - cudaStream_t stream) -{ - detail::lstsqSvdQR(handle, A, n_rows, n_cols, b, w, stream); -} - -/** Solves the linear ordinary least squares problem `Aw = b` - * Via SVD decomposition of `A = U S V^T` using Jacobi iterations (cuSOLVER). - * - * @param[in] handle raft handle - * @param[inout] A input feature matrix. - * Warning: the content of this matrix is modified by the cuSOLVER routines. - * @param[in] n_rows number of rows in A - * @param[in] n_cols number of columns in A - * @param[inout] b input target vector. - * Warning: the content of this vector is modified by the cuSOLVER routines. - * @param[out] w output coefficient vector - * @param[in] stream cuda stream for ordering operations - */ -template -void lstsqSvdJacobi(const raft::handle_t& handle, - math_t* A, - const int n_rows, - const int n_cols, - const math_t* b, - math_t* w, - cudaStream_t stream) -{ - detail::lstsqSvdJacobi(handle, A, n_rows, n_cols, b, w, stream); -} - -/** Solves the linear ordinary least squares problem `Aw = b` - * via eigenvalue decomposition of `A^T * A` (covariance matrix for dataset A). - * (`w = (A^T A)^-1 A^T b`) +/** + * DISCLAIMER: this file is deprecated: use lstsq.cuh instead */ -template -void lstsqEig(const raft::handle_t& handle, - const math_t* A, - const int n_rows, - const int n_cols, - const math_t* b, - math_t* w, - cudaStream_t stream) -{ - detail::lstsqEig(handle, A, n_rows, n_cols, b, w, stream); -} -/** Solves the linear ordinary least squares problem `Aw = b` - * via QR decomposition of `A = QR`. - * (triangular system of equations `Rw = Q^T b`) - * - * @param[in] handle raft handle - * @param[inout] A input feature matrix. - * Warning: the content of this matrix is modified by the cuSOLVER routines. - * @param[in] n_rows number of rows in A - * @param[in] n_cols number of columns in A - * @param[inout] b input target vector. - * Warning: the content of this vector is modified by the cuSOLVER routines. - * @param[out] w output coefficient vector - * @param[in] stream cuda stream for ordering operations - */ -template -void lstsqQR(const raft::handle_t& handle, - math_t* A, - const int n_rows, - const int n_cols, - math_t* b, - math_t* w, - cudaStream_t stream) -{ - detail::lstsqQR(handle, A, n_rows, n_cols, b, w, stream); -} +#pragma once -}; // namespace linalg -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "lstsq.cuh" diff --git a/cpp/include/raft/linalg/map_then_reduce.hpp b/cpp/include/raft/linalg/map_then_reduce.hpp index 235485926b..6502a84edb 100644 --- a/cpp/include/raft/linalg/map_then_reduce.hpp +++ b/cpp/include/raft/linalg/map_then_reduce.hpp @@ -18,79 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MAP_THEN_REDUCE_H -#define __MAP_THEN_REDUCE_H - -#pragma once - -#include "detail/map_then_reduce.cuh" - -namespace raft { -namespace linalg { - /** - * @brief CUDA version of map and then sum reduction operation - * @tparam Type data-type upon which the math operation will be performed - * @tparam MapOp the device-lambda performing the actual operation - * @tparam TPB threads-per-block in the final kernel launched - * @tparam Args additional parameters - * @param out the output sum-reduced value (assumed to be a device pointer) - * @param len number of elements in the input array - * @param map the device-lambda - * @param stream cuda-stream where to launch this kernel - * @param in the input array - * @param args additional input arrays + * DISCLAIMER: this file is deprecated: use map_then_reduce.cuh instead */ -template -void mapThenSumReduce( - OutType* out, size_t len, MapOp map, cudaStream_t stream, const InType* in, Args... args) -{ - detail::mapThenReduceImpl( - out, len, (OutType)0, map, detail::sum_tag(), stream, in, args...); -} - -/** - * @brief CUDA version of map and then generic reduction operation - * @tparam Type data-type upon which the math operation will be performed - * @tparam MapOp the device-lambda performing the actual map operation - * @tparam ReduceLambda the device-lambda performing the actual reduction - * @tparam TPB threads-per-block in the final kernel launched - * @tparam Args additional parameters - * @param out the output reduced value (assumed to be a device pointer) - * @param len number of elements in the input array - * @param neutral The neutral element of the reduction operation. For example: - * 0 for sum, 1 for multiply, +Inf for Min, -Inf for Max - * @param map the device-lambda - * @param op the reduction device lambda - * @param stream cuda-stream where to launch this kernel - * @param in the input array - * @param args additional input arrays - */ +#pragma once -template -void mapThenReduce(OutType* out, - size_t len, - OutType neutral, - MapOp map, - ReduceLambda op, - cudaStream_t stream, - const InType* in, - Args... args) -{ - detail::mapThenReduceImpl( - out, len, neutral, map, op, stream, in, args...); -} -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "map_then_reduce.cuh" diff --git a/cpp/include/raft/linalg/matrix_vector_op.hpp b/cpp/include/raft/linalg/matrix_vector_op.hpp index 574d4aee63..1237961ceb 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.hpp +++ b/cpp/include/raft/linalg/matrix_vector_op.hpp @@ -18,93 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MATRIX_VECTOR_OP_H -#define __MATRIX_VECTOR_OP_H - -#pragma once - -#include "detail/matrix_vector_op.cuh" - -namespace raft { -namespace linalg { - /** - * @brief Operations for all the columns or rows with a given vector. - * Caution : Threads process multiple elements to speed up processing. These - * are loaded in a single read thanks to type promotion. Faster processing - * would thus only be enabled when adresses are optimally aligned for it. - * Note : the function will also check that the size of the window of accesses - * is a multiple of the number of elements processed by a thread in order to - * enable faster processing - * @tparam Type the matrix/vector type - * @tparam Lambda a device function which represents a binary operator - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads per block of the cuda kernel launched - * @param out the output matrix (passing out = matrix makes it in-place) - * @param matrix the input matrix - * @param vec the vector - * @param D number of columns of matrix - * @param N number of rows of matrix - * @param rowMajor whether input is row or col major - * @param bcastAlongRows whether the broadcast of vector needs to happen along - * the rows of the matrix or columns - * @param op the mathematical operation - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use matrix_vector_op.cuh instead */ -template -void matrixVectorOp(Type* out, - const Type* matrix, - const Type* vec, - IdxType D, - IdxType N, - bool rowMajor, - bool bcastAlongRows, - Lambda op, - cudaStream_t stream) -{ - detail::matrixVectorOp(out, matrix, vec, D, N, rowMajor, bcastAlongRows, op, stream); -} -/** - * @brief Operations for all the columns or rows with the given vectors. - * Caution : Threads process multiple elements to speed up processing. These - * are loaded in a single read thanks to type promotion. Faster processing - * would thus only be enabled when adresses are optimally aligned for it. - * Note : the function will also check that the size of the window of accesses - * is a multiple of the number of elements processed by a thread in order to - * enable faster processing - * @tparam Type the matrix/vector type - * @tparam Lambda a device function which represents a binary operator - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads per block of the cuda kernel launched - * @param out the output matrix (passing out = matrix makes it in-place) - * @param matrix the input matrix - * @param vec1 the first vector - * @param vec2 the second vector - * @param D number of columns of matrix - * @param N number of rows of matrix - * @param rowMajor whether input is row or col major - * @param bcastAlongRows whether the broadcast of vector needs to happen along - * the rows of the matrix or columns - * @param op the mathematical operation - * @param stream cuda stream where to launch work - */ -template -void matrixVectorOp(Type* out, - const Type* matrix, - const Type* vec1, - const Type* vec2, - IdxType D, - IdxType N, - bool rowMajor, - bool bcastAlongRows, - Lambda op, - cudaStream_t stream) -{ - detail::matrixVectorOp(out, matrix, vec1, vec2, D, N, rowMajor, bcastAlongRows, op, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "matrix_vector_op.cuh" diff --git a/cpp/include/raft/linalg/mean_squared_error.hpp b/cpp/include/raft/linalg/mean_squared_error.hpp index 7a7f03ee18..cbb974e516 100644 --- a/cpp/include/raft/linalg/mean_squared_error.hpp +++ b/cpp/include/raft/linalg/mean_squared_error.hpp @@ -18,35 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MSE_H -#define __MSE_H - -#pragma once - -#include "detail/mean_squared_error.cuh" - -namespace raft { -namespace linalg { - /** - * @brief CUDA version mean squared error function mean((A-B)**2) - * @tparam math_t data-type upon which the math operation will be performed - * @tparam TPB threads-per-block - * @param out the output mean squared error value (assumed to be a device pointer) - * @param A input array (assumed to be a device pointer) - * @param B input array (assumed to be a device pointer) - * @param len number of elements in the input arrays - * @param weight weight to apply to every term in the mean squared error calculation - * @param stream cuda-stream where to launch this kernel + * DISCLAIMER: this file is deprecated: use mean_squared_error.cuh instead */ -template -void meanSquaredError( - math_t* out, const math_t* A, const math_t* B, size_t len, math_t weight, cudaStream_t stream) -{ - detail::meanSquaredError(out, A, B, len, weight, stream); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "mean_squared_error.cuh" diff --git a/cpp/include/raft/linalg/multiply.hpp b/cpp/include/raft/linalg/multiply.hpp index eb933cd607..5aa481a894 100644 --- a/cpp/include/raft/linalg/multiply.hpp +++ b/cpp/include/raft/linalg/multiply.hpp @@ -18,35 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MULTIPLY_H -#define __MULTIPLY_H - -#pragma once - -#include "detail/multiply.cuh" - -namespace raft { -namespace linalg { - /** - * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - * @{ + * DISCLAIMER: this file is deprecated: use multiply.cuh instead */ -template -void multiplyScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) -{ - detail::multiplyScalar(out, in, scalar, len, stream); -} -/** @} */ -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "multiply.cuh" diff --git a/cpp/include/raft/linalg/norm.hpp b/cpp/include/raft/linalg/norm.hpp index 958784d67e..b750367f05 100644 --- a/cpp/include/raft/linalg/norm.hpp +++ b/cpp/include/raft/linalg/norm.hpp @@ -18,82 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __NORM_H -#define __NORM_H - -#pragma once - -#include "detail/norm.cuh" - -namespace raft { -namespace linalg { - -/** different types of norms supported on the input buffers */ -using detail::L1Norm; -using detail::L2Norm; -using detail::NormType; - /** - * @brief Compute row-wise norm of the input matrix and perform fin_op lambda - * - * Row-wise norm is useful while computing pairwise distance matrix, for - * example. - * This is used in many clustering algos like knn, kmeans, dbscan, etc... The - * current implementation is optimized only for bigger values of 'D'. - * - * @tparam Type the data type - * @tparam Lambda device final lambda - * @tparam IdxType Integer type used to for addressing - * @param dots the output vector of row-wise dot products - * @param data the input matrix (currently assumed to be row-major) - * @param D number of columns of data - * @param N number of rows of data - * @param type the type of norm to be applied - * @param rowMajor whether the input is row-major or not - * @param stream cuda stream where to launch work - * @param fin_op the final lambda op + * DISCLAIMER: this file is deprecated: use norm.cuh instead */ -template > -void rowNorm(Type* dots, - const Type* data, - IdxType D, - IdxType N, - NormType type, - bool rowMajor, - cudaStream_t stream, - Lambda fin_op = raft::Nop()) -{ - detail::rowNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op); -} -/** - * @brief Compute column-wise norm of the input matrix and perform fin_op - * @tparam Type the data type - * @tparam Lambda device final lambda - * @tparam IdxType Integer type used to for addressing - * @param dots the output vector of column-wise dot products - * @param data the input matrix (currently assumed to be row-major) - * @param D number of columns of data - * @param N number of rows of data - * @param type the type of norm to be applied - * @param rowMajor whether the input is row-major or not - * @param stream cuda stream where to launch work - * @param fin_op the final lambda op - */ -template > -void colNorm(Type* dots, - const Type* data, - IdxType D, - IdxType N, - NormType type, - bool rowMajor, - cudaStream_t stream, - Lambda fin_op = raft::Nop()) -{ - detail::colNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "norm.cuh" diff --git a/cpp/include/raft/linalg/power.hpp b/cpp/include/raft/linalg/power.hpp index d1506ff7a9..1e4a56d4fb 100644 --- a/cpp/include/raft/linalg/power.hpp +++ b/cpp/include/raft/linalg/power.hpp @@ -18,57 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __POWER_H -#define __POWER_H - -#pragma once - -#include -#include -#include - -namespace raft { -namespace linalg { - /** - * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - * @{ + * DISCLAIMER: this file is deprecated: use power.cuh instead */ -template -void powerScalar(math_t* out, const math_t* in, math_t scalar, IdxType len, cudaStream_t stream) -{ - raft::linalg::unaryOp( - out, in, len, [scalar] __device__(math_t in) { return raft::myPow(in, scalar); }, stream); -} -/** @} */ -/** - * @defgroup BinaryOps Element-wise binary operations on the input buffers - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in1 the first input buffer - * @param in2 the second input buffer - * @param len number of elements in the input buffers - * @param stream cuda stream where to launch work - * @{ - */ -template -void power(math_t* out, const math_t* in1, const math_t* in2, IdxType len, cudaStream_t stream) -{ - raft::linalg::binaryOp( - out, in1, in2, len, [] __device__(math_t a, math_t b) { return raft::myPow(a, b); }, stream); -} -/** @} */ +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "power.cuh" diff --git a/cpp/include/raft/linalg/reduce.hpp b/cpp/include/raft/linalg/reduce.hpp index b9cc2c6e9d..b965cfac7b 100644 --- a/cpp/include/raft/linalg/reduce.hpp +++ b/cpp/include/raft/linalg/reduce.hpp @@ -18,69 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __REDUCE_H -#define __REDUCE_H - -#pragma once - -#include "detail/reduce.cuh" - -namespace raft { -namespace linalg { - /** - * @brief Compute reduction of the input matrix along the requested dimension - * - * @tparam InType the data type of the input - * @tparam OutType the data type of the output (as well as the data type for - * which reduction is performed) - * @tparam IdxType data type of the indices of the array - * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*MainLambda)(InType, IdxType);
- * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*ReduceLambda)(OutType);
- * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*FinalLambda)(OutType);
- * @param dots the output reduction vector - * @param data the input matrix - * @param D number of columns - * @param N number of rows - * @param init initial value to use for the reduction - * @param rowMajor input matrix is row-major or not - * @param alongRows whether to reduce along rows or columns - * @param stream cuda stream where to launch work - * @param inplace reduction result added inplace or overwrites old values? - * @param main_op elementwise operation to apply before reduction - * @param reduce_op binary reduction operation - * @param final_op elementwise operation to apply before storing results + * DISCLAIMER: this file is deprecated: use reduce.cuh instead */ -template , - typename ReduceLambda = raft::Sum, - typename FinalLambda = raft::Nop> -void reduce(OutType* dots, - const InType* data, - int D, - int N, - OutType init, - bool rowMajor, - bool alongRows, - cudaStream_t stream, - bool inplace = false, - MainLambda main_op = raft::Nop(), - ReduceLambda reduce_op = raft::Sum(), - FinalLambda final_op = raft::Nop()) -{ - detail::reduce( - dots, data, D, N, init, rowMajor, alongRows, stream, inplace, main_op, reduce_op, final_op); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "reduce.cuh" diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.hpp b/cpp/include/raft/linalg/reduce_cols_by_key.hpp index c24baa60de..70851c2b69 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.hpp +++ b/cpp/include/raft/linalg/reduce_cols_by_key.hpp @@ -18,45 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __REDUCE_COLS_BY_KEY -#define __REDUCE_COLS_BY_KEY +/** + * DISCLAIMER: this file is deprecated: use reduce_cols_by_key.cuh instead + */ #pragma once -#include - -namespace raft { -namespace linalg { +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -/** - * @brief Computes the sum-reduction of matrix columns for each given key - * @tparam T the input data type (as well as the output reduced matrix) - * @tparam KeyType data type of the keys - * @tparam IdxType indexing arithmetic type - * @param data the input data (dim = nrows x ncols). This is assumed to be in - * row-major layout - * @param keys keys array (len = ncols). It is assumed that each key in this - * array is between [0, nkeys). In case this is not true, the caller is expected - * to have called make_monotonic primitive to prepare such a contiguous and - * monotonically increasing keys array. - * @param out the output reduced matrix along columns (dim = nrows x nkeys). - * This will be assumed to be in row-major layout - * @param nrows number of rows in the input data - * @param ncols number of colums in the input data - * @param nkeys number of unique keys in the keys array - * @param stream cuda stream to launch the kernel onto - */ -template -void reduce_cols_by_key(const T* data, - const KeyIteratorT keys, - T* out, - IdxType nrows, - IdxType ncols, - IdxType nkeys, - cudaStream_t stream) -{ - detail::reduce_cols_by_key(data, keys, out, nrows, ncols, nkeys, stream); -} -}; // end namespace linalg -}; // end namespace raft -#endif \ No newline at end of file +#include "reduce_cols_by_key.cuh" diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.hpp b/cpp/include/raft/linalg/reduce_rows_by_key.hpp index d18a00aa1d..4b5e76ea8f 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.hpp +++ b/cpp/include/raft/linalg/reduce_rows_by_key.hpp @@ -18,102 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __REDUCE_ROWS_BY_KEY -#define __REDUCE_ROWS_BY_KEY - -#pragma once - -#include - -namespace raft { -namespace linalg { - -/** - Small helper function to convert from int->char and char->int - Transform ncols*nrows read of int in 2*nrows reads of int + ncols*rows reads of chars -**/ -template -void convert_array(IteratorT1 dst, IteratorT2 src, int n, cudaStream_t st) -{ - detail::convert_array(dst, src, n, st); -} - /** - * @brief Computes the weighted reduction of matrix rows for each given key - * - * @tparam DataIteratorT Random-access iterator type, for reading input matrix - * (may be a simple pointer type) - * @tparam KeysIteratorT Random-access iterator type, for reading input keys - * (may be a simple pointer type) - * - * @param[in] d_A Input data array (lda x nrows) - * @param[in] lda Real row size for input data, d_A - * @param[in] d_keys Keys for each row (1 x nrows) - * @param[in] d_weights Weights for each observation in d_A (1 x nrows) - * @param[out] d_keys_char Scratch memory for conversion of keys to char - * @param[in] nrows Number of rows in d_A and d_keys - * @param[in] ncols Number of data columns in d_A - * @param[in] nkeys Number of unique keys in d_keys - * @param[out] d_sums Row sums by key (ncols x d_keys) - * @param[in] stream CUDA stream + * DISCLAIMER: this file is deprecated: use reduce_rows_by_key.cuh instead */ -template -void reduce_rows_by_key(const DataIteratorT d_A, - int lda, - const KeysIteratorT d_keys, - const WeightT* d_weights, - char* d_keys_char, - int nrows, - int ncols, - int nkeys, - DataIteratorT d_sums, - cudaStream_t stream) -{ - detail::reduce_rows_by_key( - d_A, lda, d_keys, d_weights, d_keys_char, nrows, ncols, nkeys, d_sums, stream); -} -/** - * @brief Computes the reduction of matrix rows for each given key - * @tparam DataIteratorT Random-access iterator type, for reading input matrix (may be a simple - * pointer type) - * @tparam KeysIteratorT Random-access iterator type, for reading input keys (may be a simple - * pointer type) - * @param[in] d_A Input data array (lda x nrows) - * @param[in] lda Real row size for input data, d_A - * @param[in] d_keys Keys for each row (1 x nrows) - * @param d_keys_char Scratch memory for conversion of keys to char - * @param[in] nrows Number of rows in d_A and d_keys - * @param[in] ncols Number of data columns in d_A - * @param[in] nkeys Number of unique keys in d_keys - * @param[out] d_sums Row sums by key (ncols x d_keys) - * @param[in] stream CUDA stream - */ -template -void reduce_rows_by_key(const DataIteratorT d_A, - int lda, - const KeysIteratorT d_keys, - char* d_keys_char, - int nrows, - int ncols, - int nkeys, - DataIteratorT d_sums, - cudaStream_t stream) -{ - typedef typename std::iterator_traits::value_type DataType; - reduce_rows_by_key(d_A, - lda, - d_keys, - static_cast(nullptr), - d_keys_char, - nrows, - ncols, - nkeys, - d_sums, - stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "reduce_rows_by_key.cuh" diff --git a/cpp/include/raft/linalg/rsvd.hpp b/cpp/include/raft/linalg/rsvd.hpp index ac6e13b555..7e2fffba75 100644 --- a/cpp/include/raft/linalg/rsvd.hpp +++ b/cpp/include/raft/linalg/rsvd.hpp @@ -18,131 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __RSVD_H -#define __RSVD_H - -#pragma once - -#include - -namespace raft { -namespace linalg { - /** - * @brief randomized singular value decomposition (RSVD) on the column major - * float type input matrix (Jacobi-based), by specifying no. of PCs and - * upsamples directly - * @param handle: raft handle - * @param M: input matrix - * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix - * @param S_vec: singular values of input matrix - * @param U: left singular values of input matrix - * @param V: right singular values of input matrix - * @param k: no. of singular values to be computed - * @param p: no. of upsamples - * @param use_bbt: whether use eigen decomposition in computation or not - * @param gen_left_vec: left vector needs to be generated or not? - * @param gen_right_vec: right vector needs to be generated or not? - * @param use_jacobi: whether to jacobi solver for decomposition - * @param tol: tolerance for Jacobi-based solvers - * @param max_sweeps: maximum number of sweeps for Jacobi-based solvers - * @param stream cuda stream + * DISCLAIMER: this file is deprecated: use rsvd.cuh instead */ -template -void rsvdFixedRank(const raft::handle_t& handle, - math_t* M, - int n_rows, - int n_cols, - math_t* S_vec, - math_t* U, - math_t* V, - int k, - int p, - bool use_bbt, - bool gen_left_vec, - bool gen_right_vec, - bool use_jacobi, - math_t tol, - int max_sweeps, - cudaStream_t stream) -{ - detail::rsvdFixedRank(handle, - M, - n_rows, - n_cols, - S_vec, - U, - V, - k, - p, - use_bbt, - gen_left_vec, - gen_right_vec, - use_jacobi, - tol, - max_sweeps, - stream); -} -/** - * @brief randomized singular value decomposition (RSVD) on the column major - * float type input matrix (Jacobi-based), by specifying the PC and upsampling - * ratio - * @param handle: raft handle - * @param M: input matrix - * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix - * @param S_vec: singular values of input matrix - * @param U: left singular values of input matrix - * @param V: right singular values of input matrix - * @param PC_perc: percentage of singular values to be computed - * @param UpS_perc: upsampling percentage - * @param use_bbt: whether use eigen decomposition in computation or not - * @param gen_left_vec: left vector needs to be generated or not? - * @param gen_right_vec: right vector needs to be generated or not? - * @param use_jacobi: whether to jacobi solver for decomposition - * @param tol: tolerance for Jacobi-based solvers - * @param max_sweeps: maximum number of sweeps for Jacobi-based solvers - * @param stream cuda stream - */ -template -void rsvdPerc(const raft::handle_t& handle, - math_t* M, - int n_rows, - int n_cols, - math_t* S_vec, - math_t* U, - math_t* V, - math_t PC_perc, - math_t UpS_perc, - bool use_bbt, - bool gen_left_vec, - bool gen_right_vec, - bool use_jacobi, - math_t tol, - int max_sweeps, - cudaStream_t stream) -{ - detail::rsvdPerc(handle, - M, - n_rows, - n_cols, - S_vec, - U, - V, - PC_perc, - UpS_perc, - use_bbt, - gen_left_vec, - gen_right_vec, - use_jacobi, - tol, - max_sweeps, - stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "rsvd.cuh" diff --git a/cpp/include/raft/linalg/sqrt.hpp b/cpp/include/raft/linalg/sqrt.hpp index 9c66ee2d14..e0f77f0ab9 100644 --- a/cpp/include/raft/linalg/sqrt.hpp +++ b/cpp/include/raft/linalg/sqrt.hpp @@ -18,36 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SQRT_H -#define __SQRT_H - -#pragma once - -#include -#include - -namespace raft { -namespace linalg { - /** - * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param out the output buffer - * @param in the input buffer - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work - * @{ + * DISCLAIMER: this file is deprecated: use sqrt.cuh instead */ -template -void sqrt(math_t* out, const math_t* in, IdxType len, cudaStream_t stream) -{ - raft::linalg::unaryOp( - out, in, len, [] __device__(math_t in) { return raft::mySqrt(in); }, stream); -} -/** @} */ -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "sqrt.cuh" diff --git a/cpp/include/raft/linalg/strided_reduction.hpp b/cpp/include/raft/linalg/strided_reduction.hpp index 3b1597dfc3..6720a302ea 100644 --- a/cpp/include/raft/linalg/strided_reduction.hpp +++ b/cpp/include/raft/linalg/strided_reduction.hpp @@ -18,64 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __STRIDED_REDUCTION_H -#define __STRIDED_REDUCTION_H - -#pragma once - -#include "detail/strided_reduction.cuh" - -namespace raft { -namespace linalg { - /** - * @brief Compute reduction of the input matrix along the strided dimension - * - * @tparam InType the data type of the input - * @tparam OutType the data type of the output (as well as the data type for - * which reduction is performed) - * @tparam IdxType data type of the indices of the array - * @tparam MainLambda Unary lambda applied while acculumation (eg: L1 or L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*MainLambda)(InType, IdxType);
- * @tparam ReduceLambda Binary lambda applied for reduction (eg: addition(+) for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*ReduceLambda)(OutType);
- * @tparam FinalLambda the final lambda applied before STG (eg: Sqrt for L2 norm) - * It must be a 'callable' supporting the following input and output: - *
OutType (*FinalLambda)(OutType);
- * @param dots the output reduction vector - * @param data the input matrix - * @param D leading dimension of data - * @param N second dimension data - * @param init initial value to use for the reduction - * @param main_op elementwise operation to apply before reduction - * @param reduce_op binary reduction operation - * @param final_op elementwise operation to apply before storing results - * @param inplace reduction result added inplace or overwrites old values? - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use strided_reduction.cuh instead */ -template , - typename ReduceLambda = raft::Sum, - typename FinalLambda = raft::Nop> -void stridedReduction(OutType* dots, - const InType* data, - IdxType D, - IdxType N, - OutType init, - cudaStream_t stream, - bool inplace = false, - MainLambda main_op = raft::Nop(), - ReduceLambda reduce_op = raft::Sum(), - FinalLambda final_op = raft::Nop()) -{ - detail::stridedReduction(dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "strided_reduction.cuh" diff --git a/cpp/include/raft/linalg/subtract.hpp b/cpp/include/raft/linalg/subtract.hpp index accf57a939..b0c6508ffe 100644 --- a/cpp/include/raft/linalg/subtract.hpp +++ b/cpp/include/raft/linalg/subtract.hpp @@ -18,77 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SUBTRACT_H -#define __SUBTRACT_H - -#pragma once - -#include "detail/subtract.cuh" - -namespace raft { -namespace linalg { - /** - * @brief Elementwise scalar subtraction operation on the input buffer - * - * @tparam InT input data-type. Also the data-type upon which the math ops - * will be performed - * @tparam OutT output data-type - * @tparam IdxType Integer type used to for addressing - * - * @param out the output buffer - * @param in the input buffer - * @param scalar the scalar used in the operations - * @param len number of elements in the input buffer - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use subtract.cuh instead */ -template -void subtractScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream) -{ - detail::subtractScalar(out, in, scalar, len, stream); -} -/** - * @brief Elementwise subtraction operation on the input buffers - * @tparam InT input data-type. Also the data-type upon which the math ops - * will be performed - * @tparam OutT output data-type - * @tparam IdxType Integer type used to for addressing - * - * @param out the output buffer - * @param in1 the first input buffer - * @param in2 the second input buffer - * @param len number of elements in the input buffers - * @param stream cuda stream where to launch work - */ -template -void subtract(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream) -{ - detail::subtract(out, in1, in2, len, stream); -} - -/** Substract single value pointed by singleScalarDev parameter in device memory from inDev[i] and - * write result to outDev[i] - * @tparam math_t data-type upon which the math operation will be performed - * @tparam IdxType Integer type used to for addressing - * @param outDev the output buffer - * @param inDev the input buffer - * @param singleScalarDev pointer to the scalar located in device memory - * @param len number of elements in the input and output buffer - * @param stream cuda stream - * @remark block size has not been tuned - */ -template -void subtractDevScalar(math_t* outDev, - const math_t* inDev, - const math_t* singleScalarDev, - IdxType len, - cudaStream_t stream) -{ - detail::subtractDevScalar(outDev, inDev, singleScalarDev, len, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "subtract.cuh" diff --git a/cpp/include/raft/linalg/svd.hpp b/cpp/include/raft/linalg/svd.hpp index 01788a4188..26bce80388 100644 --- a/cpp/include/raft/linalg/svd.hpp +++ b/cpp/include/raft/linalg/svd.hpp @@ -18,176 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SVD_H -#define __SVD_H - -#pragma once - -#include "detail/svd.cuh" - -namespace raft { -namespace linalg { - /** - * @brief singular value decomposition (SVD) on the column major float type - * input matrix using QR method - * @param handle: raft handle - * @param in: input matrix - * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix - * @param sing_vals: singular values of input matrix - * @param left_sing_vecs: left singular values of input matrix - * @param right_sing_vecs: right singular values of input matrix - * @param trans_right: transpose right vectors or not - * @param gen_left_vec: generate left eig vector. Not activated. - * @param gen_right_vec: generate right eig vector. Not activated. - * @param stream cuda stream + * DISCLAIMER: this file is deprecated: use svd.cuh instead */ -// TODO: activate gen_left_vec and gen_right_vec options -// TODO: couldn't template this function due to cusolverDnSgesvd and -// cusolverSnSgesvd. Check if there is any other way. -template -void svdQR(const raft::handle_t& handle, - T* in, - int n_rows, - int n_cols, - T* sing_vals, - T* left_sing_vecs, - T* right_sing_vecs, - bool trans_right, - bool gen_left_vec, - bool gen_right_vec, - cudaStream_t stream) -{ - detail::svdQR(handle, - in, - n_rows, - n_cols, - sing_vals, - left_sing_vecs, - right_sing_vecs, - trans_right, - gen_left_vec, - gen_right_vec, - stream); -} - -template -void svdEig(const raft::handle_t& handle, - T* in, - int n_rows, - int n_cols, - T* S, - T* U, - T* V, - bool gen_left_vec, - cudaStream_t stream) -{ - detail::svdEig(handle, in, n_rows, n_cols, S, U, V, gen_left_vec, stream); -} -/** - * @brief on the column major input matrix using Jacobi method - * @param handle: raft handle - * @param in: input matrix - * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix - * @param sing_vals: singular values of input matrix - * @param left_sing_vecs: left singular vectors of input matrix - * @param right_sing_vecs: right singular vectors of input matrix - * @param gen_left_vec: generate left eig vector. Not activated. - * @param gen_right_vec: generate right eig vector. Not activated. - * @param tol: error tolerance for the jacobi method. Algorithm stops when the - * error is below tol - * @param max_sweeps: number of sweeps in the Jacobi algorithm. The more the better - * accuracy. - * @param stream cuda stream - */ -template -void svdJacobi(const raft::handle_t& handle, - math_t* in, - int n_rows, - int n_cols, - math_t* sing_vals, - math_t* left_sing_vecs, - math_t* right_sing_vecs, - bool gen_left_vec, - bool gen_right_vec, - math_t tol, - int max_sweeps, - cudaStream_t stream) -{ - detail::svdJacobi(handle, - in, - n_rows, - n_cols, - sing_vals, - left_sing_vecs, - right_sing_vecs, - gen_left_vec, - gen_right_vec, - tol, - max_sweeps, - stream); -} - -/** - * @brief reconstruct a matrix use left and right singular vectors and - * singular values - * @param handle: raft handle - * @param U: left singular vectors of size n_rows x k - * @param S: square matrix with singular values on its diagonal, k x k - * @param V: right singular vectors of size n_cols x k - * @param out: reconstructed matrix to be returned - * @param n_rows: number rows of output matrix - * @param n_cols: number columns of output matrix - * @param k: number of singular values - * @param stream cuda stream - */ -template -void svdReconstruction(const raft::handle_t& handle, - math_t* U, - math_t* S, - math_t* V, - math_t* out, - int n_rows, - int n_cols, - int k, - cudaStream_t stream) -{ - detail::svdReconstruction(handle, U, S, V, out, n_rows, n_cols, k, stream); -} - -/** - * @brief reconstruct a matrix use left and right singular vectors and - * singular values - * @param handle: raft handle - * @param A_d: input matrix - * @param U: left singular vectors of size n_rows x k - * @param S_vec: singular values as a vector - * @param V: right singular vectors of size n_cols x k - * @param n_rows: number rows of output matrix - * @param n_cols: number columns of output matrix - * @param k: number of singular values to be computed, 1.0 for normal SVD - * @param tol: tolerance for the evaluation - * @param stream cuda stream - */ -template -bool evaluateSVDByL2Norm(const raft::handle_t& handle, - math_t* A_d, - math_t* U, - math_t* S_vec, - math_t* V, - int n_rows, - int n_cols, - int k, - math_t tol, - cudaStream_t stream) -{ - return detail::evaluateSVDByL2Norm(handle, A_d, U, S_vec, V, n_rows, n_cols, k, tol, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "svd.cuh" diff --git a/cpp/include/raft/linalg/ternary_op.hpp b/cpp/include/raft/linalg/ternary_op.hpp index bce9eacb11..58dab89609 100644 --- a/cpp/include/raft/linalg/ternary_op.hpp +++ b/cpp/include/raft/linalg/ternary_op.hpp @@ -18,42 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __TERNARY_OP_H -#define __TERNARY_OP_H - -#pragma once - -#include - -namespace raft { -namespace linalg { /** - * @brief perform element-wise ternary operation on the input arrays - * @tparam math_t data-type upon which the math operation will be performed - * @tparam Lambda the device-lambda performing the actual operation - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads-per-block in the final kernel launched - * @param out the output array - * @param in1 the first input array - * @param in2 the second input array - * @param in3 the third input array - * @param len number of elements in the input array - * @param op the device-lambda - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use ternary_op.cuh instead */ -template -void ternaryOp(math_t* out, - const math_t* in1, - const math_t* in2, - const math_t* in3, - IdxType len, - Lambda op, - cudaStream_t stream) -{ - detail::ternaryOp(out, in1, in2, in3, len, op, stream); -} -}; // end namespace linalg -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "ternary_op.cuh" diff --git a/cpp/include/raft/linalg/transpose.hpp b/cpp/include/raft/linalg/transpose.hpp index caa6bafedf..4c3f9224e4 100644 --- a/cpp/include/raft/linalg/transpose.hpp +++ b/cpp/include/raft/linalg/transpose.hpp @@ -18,49 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __TRANSPOSE_H -#define __TRANSPOSE_H - -#pragma once - -#include "detail/transpose.cuh" - -namespace raft { -namespace linalg { - /** - * @brief transpose on the column major input matrix using Jacobi method - * @param handle: raft handle - * @param in: input matrix - * @param out: output. Transposed input matrix - * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix - * @param stream: cuda stream + * DISCLAIMER: this file is deprecated: use transpose.cuh instead */ -template -void transpose(const raft::handle_t& handle, - math_t* in, - math_t* out, - int n_rows, - int n_cols, - cudaStream_t stream) -{ - detail::transpose(handle, in, out, n_rows, n_cols, stream); -} -/** - * @brief transpose on the column major input matrix using Jacobi method - * @param inout: input and output matrix - * @param n: number of rows and columns of input matrix - * @param stream: cuda stream - */ -template -void transpose(math_t* inout, int n, cudaStream_t stream) -{ - detail::transpose(inout, n, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "transpose.cuh" diff --git a/cpp/include/raft/linalg/unary_op.hpp b/cpp/include/raft/linalg/unary_op.hpp index ca1e3f9875..2ace126ff1 100644 --- a/cpp/include/raft/linalg/unary_op.hpp +++ b/cpp/include/raft/linalg/unary_op.hpp @@ -18,65 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __UNARY_OP_H -#define __UNARY_OP_H - -#pragma once - -#include "detail/unary_op.cuh" - -namespace raft { -namespace linalg { - /** - * @brief perform element-wise unary operation in the input array - * @tparam InType input data-type - * @tparam Lambda the device-lambda performing the actual operation - * @tparam OutType output data-type - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads-per-block in the final kernel launched - * @param out the output array - * @param in the input array - * @param len number of elements in the input array - * @param op the device-lambda - * @param stream cuda stream where to launch work - * @note Lambda must be a functor with the following signature: - * `OutType func(const InType& val);` + * DISCLAIMER: this file is deprecated: use unary_op.cuh instead */ -template -void unaryOp(OutType* out, const InType* in, IdxType len, Lambda op, cudaStream_t stream) -{ - detail::unaryOpCaller(out, in, len, op, stream); -} -/** - * @brief Perform an element-wise unary operation into the output array - * - * Compared to `unaryOp()`, this method does not do any reads from any inputs - * - * @tparam OutType output data-type - * @tparam Lambda the device-lambda performing the actual operation - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads-per-block in the final kernel launched - * - * @param[out] out the output array [on device] [len = len] - * @param[in] len number of elements in the input array - * @param[in] op the device-lambda which must be of the form: - * `void func(OutType* outLocationOffset, IdxType idx);` - * where outLocationOffset will be out + idx. - * @param[in] stream cuda stream where to launch work - */ -template -void writeOnlyUnaryOp(OutType* out, IdxType len, Lambda op, cudaStream_t stream) -{ - detail::writeOnlyUnaryOpCaller(out, len, op, stream); -} +#pragma once -}; // end namespace linalg -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif +#include "unary_op.cuh" diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp index 83a8738219..60c36db9e2 100644 --- a/cpp/include/raft/matrix/col_wise_sort.hpp +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -18,44 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __COL_WISE_SORT_H -#define __COL_WISE_SORT_H +/** + * DISCLAIMER: this file is deprecated: use col_wise_sort.cuh instead + */ #pragma once -#include - -namespace raft { -namespace matrix { - -/** - * @brief sort columns within each row of row-major input matrix and return sorted indexes - * modelled as key-value sort with key being input matrix and value being index of values - * @param in: input matrix - * @param out: output value(index) matrix - * @param n_rows: number rows of input matrix - * @param n_columns: number columns of input matrix - * @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize - * @param workspacePtr: pointer to workspace memory - * @param workspaceSize: Size of workspace to be allocated - * @param stream: cuda stream to execute prim on - * @param sortedKeys: Optional, output matrix for sorted keys (input) - */ -template -void sort_cols_per_row(const InType* in, - OutType* out, - int n_rows, - int n_columns, - bool& bAllocWorkspace, - void* workspacePtr, - size_t& workspaceSize, - cudaStream_t stream, - InType* sortedKeys = nullptr) -{ - detail::sortColumnsPerRow( - in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, stream, sortedKeys); -} -}; // end namespace matrix -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "col_wise_sort.cuh" diff --git a/cpp/include/raft/matrix/matrix.hpp b/cpp/include/raft/matrix/matrix.hpp index 7409140d7c..428c914784 100644 --- a/cpp/include/raft/matrix/matrix.hpp +++ b/cpp/include/raft/matrix/matrix.hpp @@ -18,265 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MATRIX_H -#define __MATRIX_H - -#pragma once - -#include "detail/linewise_op.cuh" -#include "detail/matrix.cuh" - -#include - -namespace raft { -namespace matrix { - -using namespace std; - -/** - * @brief Copy selected rows of the input matrix into contiguous space. - * - * On exit out[i + k*n_rows] = in[indices[i] + k*n_rows], - * where i = 0..n_rows_indices-1, and k = 0..n_cols-1. - * - * @param in input matrix - * @param n_rows number of rows of output matrix - * @param n_cols number of columns of output matrix - * @param out output matrix - * @param indices of the rows to be copied - * @param n_rows_indices number of rows to copy - * @param stream cuda stream - * @param rowMajor whether the matrix has row major layout - */ -template -void copyRows(const m_t* in, - idx_t n_rows, - idx_t n_cols, - m_t* out, - const idx_array_t* indices, - idx_t n_rows_indices, - cudaStream_t stream, - bool rowMajor = false) -{ - detail::copyRows(in, n_rows, n_cols, out, indices, n_rows_indices, stream, rowMajor); -} - -/** - * @brief copy matrix operation for column major matrices. - * @param in: input matrix - * @param out: output matrix - * @param n_rows: number of rows of output matrix - * @param n_cols: number of columns of output matrix - * @param stream: cuda stream - */ -template -void copy(const m_t* in, m_t* out, idx_t n_rows, idx_t n_cols, cudaStream_t stream) -{ - raft::copy_async(out, in, n_rows * n_cols, stream); -} - -/** - * @brief copy matrix operation for column major matrices. First n_rows and - * n_cols of input matrix "in" is copied to "out" matrix. - * @param in: input matrix - * @param in_n_rows: number of rows of input matrix - * @param out: output matrix - * @param out_n_rows: number of rows of output matrix - * @param out_n_cols: number of columns of output matrix - * @param stream: cuda stream - */ -template -void truncZeroOrigin( - m_t* in, idx_t in_n_rows, m_t* out, idx_t out_n_rows, idx_t out_n_cols, cudaStream_t stream) -{ - detail::truncZeroOrigin(in, in_n_rows, out, out_n_rows, out_n_cols, stream); -} - -/** - * @brief Columns of a column major matrix is reversed (i.e. first column and - * last column are swapped) - * @param inout: input and output matrix - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - * @param stream: cuda stream - */ -template -void colReverse(m_t* inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) -{ - detail::colReverse(inout, n_rows, n_cols, stream); -} - -/** - * @brief Rows of a column major matrix is reversed (i.e. first row and last - * row are swapped) - * @param inout: input and output matrix - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - * @param stream: cuda stream - */ -template -void rowReverse(m_t* inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) -{ - detail::rowReverse(inout, n_rows, n_cols, stream); -} - -/** - * @brief Prints the data stored in GPU memory - * @param in: input matrix - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - * @param h_separator: horizontal separator character - * @param v_separator: vertical separator character - * @param stream: cuda stream - */ -template -void print(const m_t* in, - idx_t n_rows, - idx_t n_cols, - char h_separator = ' ', - char v_separator = '\n', - cudaStream_t stream = rmm::cuda_stream_default) -{ - detail::print(in, n_rows, n_cols, h_separator, v_separator, stream); -} - -/** - * @brief Prints the data stored in CPU memory - * @param in: input matrix - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - */ -template -void printHost(const m_t* in, idx_t n_rows, idx_t n_cols) -{ - detail::printHost(in, n_rows, n_cols); -} - -/** - * @brief Slice a matrix (in-place) - * @param in: input matrix - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - * @param out: output matrix - * @param x1, y1: coordinate of the top-left point of the wanted area (0-based) - * @param x2, y2: coordinate of the bottom-right point of the wanted area - * (1-based) - * example: Slice the 2nd and 3rd columns of a 4x3 matrix: slice_matrix(M_d, 4, - * 3, 0, 1, 4, 3); - * @param stream: cuda stream - */ -template -void sliceMatrix(m_t* in, - idx_t n_rows, - idx_t n_cols, - m_t* out, - idx_t x1, - idx_t y1, - idx_t x2, - idx_t y2, - cudaStream_t stream) -{ - detail::sliceMatrix(in, n_rows, n_cols, out, x1, y1, x2, y2, stream); -} - /** - * @brief Copy the upper triangular part of a matrix to another - * @param src: input matrix with a size of n_rows x n_cols - * @param dst: output matrix with a size of kxk, k = min(n_rows, n_cols) - * @param n_rows: number of rows of input matrix - * @param n_cols: number of columns of input matrix - * @param stream: cuda stream + * DISCLAIMER: this file is deprecated: use matrix.cuh instead */ -template -void copyUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, cudaStream_t stream) -{ - detail::copyUpperTriangular(src, dst, n_rows, n_cols, stream); -} -/** - * @brief Initialize a diagonal matrix with a vector - * @param vec: vector of length k = min(n_rows, n_cols) - * @param matrix: matrix of size n_rows x n_cols - * @param n_rows: number of rows of the matrix - * @param n_cols: number of columns of the matrix - * @param stream: cuda stream - */ -template -void initializeDiagonalMatrix( - m_t* vec, m_t* matrix, idx_t n_rows, idx_t n_cols, cudaStream_t stream) -{ - detail::initializeDiagonalMatrix(vec, matrix, n_rows, n_cols, stream); -} - -/** - * @brief Get a square matrix with elements on diagonal reversed (in-place) - * @param in: square input matrix with size len x len - * @param len: size of one side of the matrix - * @param stream: cuda stream - */ -template -void getDiagonalInverseMatrix(m_t* in, idx_t len, cudaStream_t stream) -{ - detail::getDiagonalInverseMatrix(in, len, stream); -} - -/** - * @brief Get the L2/F-norm of a matrix/vector - * @param handle - * @param in: input matrix/vector with totally size elements - * @param size: size of the matrix/vector - * @param stream: cuda stream - */ -template -m_t getL2Norm(const raft::handle_t& handle, m_t* in, idx_t size, cudaStream_t stream) -{ - return detail::getL2Norm(handle, in, size, stream); -} - -/** - * Run a function over matrix lines (rows or columns) with a variable number - * row-vectors or column-vectors. - * The term `line` here signifies that the lines can be either columns or rows, - * depending on the matrix layout. - * What matters is if the vectors are applied along lines (indices of vectors correspond to - * indices within lines), or across lines (indices of vectors correspond to line numbers). - * - * @param [out] out result of the operation; can be same as `in`; should be aligned the same - * as `in` to allow faster vectorized memory transfers. - * @param [in] in input matrix consisting of `nLines` lines, each `lineLen`-long. - * @param [in] lineLen length of matrix line in elements (`=nCols` in row-major or `=nRows` in - * col-major) - * @param [in] nLines number of matrix lines (`=nRows` in row-major or `=nCols` in col-major) - * @param [in] alongLines whether vectors are indices along or across lines. - * @param [in] op the operation applied on each line: - * for i in [0..lineLen) and j in [0..nLines): - * out[i, j] = op(in[i, j], vec1[i], vec2[i], ... veck[i]) if alongLines = true - * out[i, j] = op(in[i, j], vec1[j], vec2[j], ... veck[j]) if alongLines = false - * where matrix indexing is row-major ([i, j] = [i + lineLen * j]). - * @param [in] stream a cuda stream for the kernels - * @param [in] vecs zero or more vectors to be passed as arguments, - * size of each vector is `alongLines ? lineLen : nLines`. - */ -template -void linewiseOp(m_t* out, - const m_t* in, - const idx_t lineLen, - const idx_t nLines, - const bool alongLines, - Lambda op, - cudaStream_t stream, - Vecs... vecs) -{ - common::nvtx::range fun_scope("linewiseOp-%c-%zu (%zu, %zu)", - alongLines ? 'l' : 'x', - sizeof...(Vecs), - size_t(lineLen), - size_t(nLines)); - detail::MatrixLinewiseOp<16, 256>::run( - out, in, lineLen, nLines, alongLines, op, stream, vecs...); -} +#pragma once -}; // end namespace matrix -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "matrix.cuh" diff --git a/cpp/include/raft/random/make_regression.hpp b/cpp/include/raft/random/make_regression.hpp index 4f6b2717f6..f3e2113f80 100644 --- a/cpp/include/raft/random/make_regression.hpp +++ b/cpp/include/raft/random/make_regression.hpp @@ -13,98 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -/* Adapted from scikit-learn - * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/_samples_generator.py - */ - /** * This file is deprecated and will be removed in release 22.06. * Please use the cuh version instead. */ -#ifndef __MAKE_REGRESSION_H -#define __MAKE_REGRESSION_H - -#pragma once - -#include - -#include "detail/make_regression.cuh" - -namespace raft::random { - /** - * @brief GPU-equivalent of sklearn.datasets.make_regression as documented at: - * https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html - * - * @tparam DataT Scalar type - * @tparam IdxT Index type - * - * @param[in] handle RAFT handle - * @param[out] out Row-major (samples, features) matrix to store - * the problem data - * @param[out] values Row-major (samples, targets) matrix to store - * the values for the regression problem - * @param[in] n_rows Number of samples - * @param[in] n_cols Number of features - * @param[in] n_informative Number of informative features (non-zero - * coefficients) - * @param[in] stream CUDA stream - * @param[out] coef Row-major (features, targets) matrix to store - * the coefficients used to generate the values - * for the regression problem. If nullptr is - * given, nothing will be written - * @param[in] n_targets Number of targets (generated values per sample) - * @param[in] bias A scalar that will be added to the values - * @param[in] effective_rank The approximate rank of the data matrix (used - * to create correlations in the data). -1 is the - * code to use well-conditioned data - * @param[in] tail_strength The relative importance of the fat noisy tail - * of the singular values profile if - * effective_rank is not -1 - * @param[in] noise Standard deviation of the gaussian noise - * applied to the output - * @param[in] shuffle Shuffle the samples and the features - * @param[in] seed Seed for the random number generator - * @param[in] type Random generator type + * DISCLAIMER: this file is deprecated: use make_regression.cuh instead */ -template -void make_regression(const raft::handle_t& handle, - DataT* out, - DataT* values, - IdxT n_rows, - IdxT n_cols, - IdxT n_informative, - cudaStream_t stream, - DataT* coef = nullptr, - IdxT n_targets = (IdxT)1, - DataT bias = (DataT)0.0, - IdxT effective_rank = (IdxT)-1, - DataT tail_strength = (DataT)0.5, - DataT noise = (DataT)0.0, - bool shuffle = true, - uint64_t seed = 0ULL, - GeneratorType type = GenPhilox) -{ - detail::make_regression_caller(handle, - out, - values, - n_rows, - n_cols, - n_informative, - stream, - coef, - n_targets, - bias, - effective_rank, - tail_strength, - noise, - shuffle, - seed, - type); -} -} // namespace raft::random +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "make_regression.cuh" diff --git a/cpp/include/raft/random/multi_variable_gaussian.hpp b/cpp/include/raft/random/multi_variable_gaussian.hpp index 6b85ec6a14..e7d78938a2 100644 --- a/cpp/include/raft/random/multi_variable_gaussian.hpp +++ b/cpp/include/raft/random/multi_variable_gaussian.hpp @@ -18,51 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MVG_H -#define __MVG_H +/** + * DISCLAIMER: this file is deprecated: use multi_variable_gaussian.cuh instead + */ #pragma once -#include "detail/multi_variable_gaussian.cuh" - -namespace raft::random { - -template -class multi_variable_gaussian : public detail::multi_variable_gaussian_impl { - public: - // using Decomposer = typename detail::multi_variable_gaussian_impl::Decomposer; - // using detail::multi_variable_gaussian_impl::Decomposer::chol_decomp; - // using detail::multi_variable_gaussian_impl::Decomposer::jacobi; - // using detail::multi_variable_gaussian_impl::Decomposer::qr; - - multi_variable_gaussian() = delete; - multi_variable_gaussian(const raft::handle_t& handle, - const int dim, - typename detail::multi_variable_gaussian_impl::Decomposer method) - : detail::multi_variable_gaussian_impl{handle, dim, method} - { - } - - std::size_t get_workspace_size() - { - return detail::multi_variable_gaussian_impl::get_workspace_size(); - } - - void set_workspace(T* workarea) - { - detail::multi_variable_gaussian_impl::set_workspace(workarea); - } - - void give_gaussian(const int nPoints, T* P, T* X, const T* x = 0) - { - detail::multi_variable_gaussian_impl::give_gaussian(nPoints, P, X, x); - } - - void deinit() { detail::multi_variable_gaussian_impl::deinit(); } - - ~multi_variable_gaussian() { deinit(); } -}; // end of multi_variable_gaussian - -}; // end of namespace raft::random +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "multi_variable_gaussian.cuh" diff --git a/cpp/include/raft/random/permute.hpp b/cpp/include/raft/random/permute.hpp index 26e22e403b..a2fafa6574 100644 --- a/cpp/include/raft/random/permute.hpp +++ b/cpp/include/raft/random/permute.hpp @@ -18,50 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __PERMUTE_H -#define __PERMUTE_H - -#pragma once - -#include "detail/permute.cuh" - -namespace raft::random { - /** - * @brief Generate permutations of the input array. Pretty useful primitive for - * shuffling the input datasets in ML algos. See note at the end for some of its - * limitations! - * @tparam Type Data type of the array to be shuffled - * @tparam IntType Integer type used for ther perms array - * @tparam IdxType Integer type used for addressing indices - * @tparam TPB threads per block - * @param perms the output permutation indices. Typically useful only when - * one wants to refer back. If you don't need this, pass a nullptr - * @param out the output shuffled array. Pass nullptr if you don't want this to - * be written. For eg: when you only want the perms array to be filled. - * @param in input array (in-place is not supported due to race conditions!) - * @param D number of columns of the input array - * @param N length of the input array (or number of rows) - * @param rowMajor whether the input/output matrices are row or col major - * @param stream cuda stream where to launch the work - * - * @note This is NOT a uniform permutation generator! In fact, it only generates - * very small percentage of permutations. If your application really requires a - * high quality permutation generator, it is recommended that you pick - * Knuth Shuffle. + * DISCLAIMER: this file is deprecated: use permute.cuh instead */ -template -void permute(IntType* perms, - Type* out, - const Type* in, - IntType D, - IntType N, - bool rowMajor, - cudaStream_t stream) -{ - detail::permute(perms, out, in, D, N, rowMajor, stream); -} -}; // end namespace raft::random +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "permute.cuh" diff --git a/cpp/include/raft/sparse/linalg/add.hpp b/cpp/include/raft/sparse/linalg/add.hpp index 39ab2d6450..e6930eaee7 100644 --- a/cpp/include/raft/sparse/linalg/add.hpp +++ b/cpp/include/raft/sparse/linalg/add.hpp @@ -18,87 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_ADD_H -#define __SPARSE_ADD_H - -#pragma once - -#include - -namespace raft { -namespace sparse { -namespace linalg { - /** - * @brief Calculate the CSR row_ind array that would result - * from summing together two CSR matrices - * @param a_ind: left hand row_ind array - * @param a_indptr: left hand index_ptr array - * @param a_val: left hand data array - * @param nnz1: size of left hand index_ptr and val arrays - * @param b_ind: right hand row_ind array - * @param b_indptr: right hand index_ptr array - * @param b_val: right hand data array - * @param nnz2: size of right hand index_ptr and val arrays - * @param m: size of output array (number of rows in final matrix) - * @param out_ind: output row_ind array - * @param stream: cuda stream to use + * DISCLAIMER: this file is deprecated: use add.cuh instead */ -template -size_t csr_add_calc_inds(const int* a_ind, - const int* a_indptr, - const T* a_val, - int nnz1, - const int* b_ind, - const int* b_indptr, - const T* b_val, - int nnz2, - int m, - int* out_ind, - cudaStream_t stream) -{ - return detail::csr_add_calc_inds( - a_ind, a_indptr, a_val, nnz1, b_ind, b_indptr, b_val, nnz2, m, out_ind, stream); -} -/** - * @brief Calculate the CSR row_ind array that would result - * from summing together two CSR matrices - * @param a_ind: left hand row_ind array - * @param a_indptr: left hand index_ptr array - * @param a_val: left hand data array - * @param nnz1: size of left hand index_ptr and val arrays - * @param b_ind: right hand row_ind array - * @param b_indptr: right hand index_ptr array - * @param b_val: right hand data array - * @param nnz2: size of right hand index_ptr and val arrays - * @param m: size of output array (number of rows in final matrix) - * @param c_ind: output row_ind array - * @param c_indptr: output ind_ptr array - * @param c_val: output data array - * @param stream: cuda stream to use - */ -template -void csr_add_finalize(const int* a_ind, - const int* a_indptr, - const T* a_val, - int nnz1, - const int* b_ind, - const int* b_indptr, - const T* b_val, - int nnz2, - int m, - int* c_ind, - int* c_indptr, - T* c_val, - cudaStream_t stream) -{ - detail::csr_add_finalize( - a_ind, a_indptr, a_val, nnz1, b_ind, b_indptr, b_val, nnz2, m, c_ind, c_indptr, c_val, stream); -} +#pragma once -}; // end NAMESPACE linalg -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "add.cuh" diff --git a/cpp/include/raft/sparse/linalg/degree.hpp b/cpp/include/raft/sparse/linalg/degree.hpp index 7cece7908e..240cfd452f 100644 --- a/cpp/include/raft/sparse/linalg/degree.hpp +++ b/cpp/include/raft/sparse/linalg/degree.hpp @@ -18,111 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_DEGREE_H -#define __SPARSE_DEGREE_H - -#pragma once - -#include -#include - -namespace raft { -namespace sparse { -namespace linalg { - -/** - * @brief Count the number of values for each row - * @tparam TPB_X: number of threads to use per block - * @param rows: rows array of the COO matrix - * @param nnz: size of the rows array - * @param results: output result array - * @param stream: cuda stream to use - */ -template -void coo_degree(const T* rows, int nnz, T* results, cudaStream_t stream) -{ - detail::coo_degree<64, T>(rows, nnz, results, stream); -} - -/** - * @brief Count the number of values for each row - * @tparam TPB_X: number of threads to use per block - * @tparam T: type name of underlying values array - * @param in: input COO object for counting rows - * @param results: output array with row counts (size=in->n_rows) - * @param stream: cuda stream to use - */ -template -void coo_degree(COO* in, int* results, cudaStream_t stream) -{ - coo_degree(in->rows(), in->nnz, results, stream); -} - -/** - * @brief Count the number of values for each row that doesn't match a particular scalar - * @tparam TPB_X: number of threads to use per block - * @tparam T: the type name of the underlying value arrays - * @param rows: Input COO row array - * @param vals: Input COO val arrays - * @param nnz: size of input COO arrays - * @param scalar: scalar to match for counting rows - * @param results: output row counts - * @param stream: cuda stream to use - */ -template -void coo_degree_scalar( - const int* rows, const T* vals, int nnz, T scalar, int* results, cudaStream_t stream = 0) -{ - detail::coo_degree_scalar<64>(rows, vals, nnz, scalar, results, stream); -} - -/** - * @brief Count the number of values for each row that doesn't match a particular scalar - * @tparam TPB_X: number of threads to use per block - * @tparam T: the type name of the underlying value arrays - * @param in: Input COO array - * @param scalar: scalar to match for counting rows - * @param results: output row counts - * @param stream: cuda stream to use - */ -template -void coo_degree_scalar(COO* in, T scalar, int* results, cudaStream_t stream) -{ - coo_degree_scalar(in->rows(), in->vals(), in->nnz, scalar, results, stream); -} - /** - * @brief Count the number of nonzeros for each row - * @tparam TPB_X: number of threads to use per block - * @tparam T: the type name of the underlying value arrays - * @param rows: Input COO row array - * @param vals: Input COO val arrays - * @param nnz: size of input COO arrays - * @param results: output row counts - * @param stream: cuda stream to use + * DISCLAIMER: this file is deprecated: use degree.cuh instead */ -template -void coo_degree_nz(const int* rows, const T* vals, int nnz, int* results, cudaStream_t stream) -{ - detail::coo_degree_nz<64>(rows, vals, nnz, results, stream); -} -/** - * @brief Count the number of nonzero values for each row - * @tparam TPB_X: number of threads to use per block - * @tparam T: the type name of the underlying value arrays - * @param in: Input COO array - * @param results: output row counts - * @param stream: cuda stream to use - */ -template -void coo_degree_nz(COO* in, int* results, cudaStream_t stream) -{ - coo_degree_nz(in->rows(), in->vals(), in->nnz, results, stream); -} +#pragma once -}; // end NAMESPACE linalg -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "degree.cuh" diff --git a/cpp/include/raft/sparse/linalg/norm.hpp b/cpp/include/raft/sparse/linalg/norm.hpp index 1f054e63ab..64261f1178 100644 --- a/cpp/include/raft/sparse/linalg/norm.hpp +++ b/cpp/include/raft/sparse/linalg/norm.hpp @@ -18,61 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_NORM_H -#define __SPARSE_NORM_H - -#pragma once - -#include - -namespace raft { -namespace sparse { -namespace linalg { - /** - * @brief Perform L1 normalization on the rows of a given CSR-formatted sparse matrix - * - * @param ia: row_ind array - * @param vals: data array - * @param nnz: size of data array - * @param m: size of row_ind array - * @param result: l1 normalized data array - * @param stream: cuda stream to use + * DISCLAIMER: this file is deprecated: use norm.cuh instead */ -template -void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) - const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr - T* result, - cudaStream_t stream) -{ // output array - detail::csr_row_normalize_l1(ia, vals, nnz, m, result, stream); -} -/** - * @brief Perform L_inf normalization on a given CSR-formatted sparse matrix - * - * @param ia: row_ind array - * @param vals: data array - * @param nnz: size of data array - * @param m: size of row_ind array - * @param result: l1 normalized data array - * @param stream: cuda stream to use - */ -template -void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) - const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr - T* result, - cudaStream_t stream) -{ - detail::csr_row_normalize_max(ia, vals, nnz, m, result, stream); -} +#pragma once -}; // end NAMESPACE linalg -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "norm.cuh" diff --git a/cpp/include/raft/sparse/linalg/spectral.hpp b/cpp/include/raft/sparse/linalg/spectral.hpp index ff400f1f0f..d7009db03f 100644 --- a/cpp/include/raft/sparse/linalg/spectral.hpp +++ b/cpp/include/raft/sparse/linalg/spectral.hpp @@ -18,31 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_SPECTRAL_H -#define __SPARSE_SPECTRAL_H - -#include -#include +/** + * DISCLAIMER: this file is deprecated: use spectral.cuh instead + */ -namespace raft { -namespace sparse { -namespace spectral { +#pragma once -template -void fit_embedding(const raft::handle_t& handle, - int* rows, - int* cols, - T* vals, - int nnz, - int n, - int n_components, - T* out, - unsigned long long seed = 1234567) -{ - detail::fit_embedding(handle, rows, cols, vals, nnz, n, n_components, out, seed); -} -}; // namespace spectral -}; // namespace sparse -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif +#include "spectral.cuh" diff --git a/cpp/include/raft/sparse/linalg/transpose.hpp b/cpp/include/raft/sparse/linalg/transpose.hpp index c709c20473..a6a0539319 100644 --- a/cpp/include/raft/sparse/linalg/transpose.hpp +++ b/cpp/include/raft/sparse/linalg/transpose.hpp @@ -18,62 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __TRANSPOSE_H -#define __TRANSPOSE_H - -#pragma once - -#include -#include - -namespace raft { -namespace sparse { -namespace linalg { - /** - * Transpose a set of CSR arrays into a set of CSC arrays. - * @tparam value_idx : data type of the CSR index arrays - * @tparam value_t : data type of the CSR data array - * @param[in] handle : used for invoking cusparse - * @param[in] csr_indptr : CSR row index array - * @param[in] csr_indices : CSR column indices array - * @param[in] csr_data : CSR data array - * @param[out] csc_indptr : CSC row index array - * @param[out] csc_indices : CSC column indices array - * @param[out] csc_data : CSC data array - * @param[in] csr_nrows : Number of rows in CSR - * @param[in] csr_ncols : Number of columns in CSR - * @param[in] nnz : Number of nonzeros of CSR - * @param[in] stream : Cuda stream for ordering events + * DISCLAIMER: this file is deprecated: use transpose.cuh instead */ -template -void csr_transpose(const raft::handle_t& handle, - const value_idx* csr_indptr, - const value_idx* csr_indices, - const value_t* csr_data, - value_idx* csc_indptr, - value_idx* csc_indices, - value_t* csc_data, - value_idx csr_nrows, - value_idx csr_ncols, - value_idx nnz, - cudaStream_t stream) -{ - detail::csr_transpose(handle.get_cusparse_handle(), - csr_indptr, - csr_indices, - csr_data, - csc_indptr, - csc_indices, - csc_data, - csr_nrows, - csr_ncols, - nnz, - stream); -} -}; // end NAMESPACE linalg -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "transpose.cuh" diff --git a/cpp/include/raft/sparse/op/filter.hpp b/cpp/include/raft/sparse/op/filter.hpp index 3821d963b0..6a59148fd7 100644 --- a/cpp/include/raft/sparse/op/filter.hpp +++ b/cpp/include/raft/sparse/op/filter.hpp @@ -18,82 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __FILTER_H -#define __FILTER_H - -#pragma once - -#include -#include -#include - -namespace raft { -namespace sparse { -namespace op { - -/** - * @brief Removes the values matching a particular scalar from a COO formatted sparse matrix. - * - * @param rows: input array of rows (size n) - * @param cols: input array of cols (size n) - * @param vals: input array of vals (size n) - * @param nnz: size of current rows/cols/vals arrays - * @param crows: compressed array of rows - * @param ccols: compressed array of cols - * @param cvals: compressed array of vals - * @param cnnz: array of non-zero counts per row - * @param cur_cnnz array of counts per row - * @param scalar: scalar to remove from arrays - * @param n: number of rows in dense matrix - * @param stream: cuda stream to use - */ -template -void coo_remove_scalar(const int* rows, - const int* cols, - const T* vals, - int nnz, - int* crows, - int* ccols, - T* cvals, - int* cnnz, - int* cur_cnnz, - T scalar, - int n, - cudaStream_t stream) -{ - detail::coo_remove_scalar<128, T>( - rows, cols, vals, nnz, crows, ccols, cvals, cnnz, cur_cnnz, scalar, n, stream); -} - /** - * @brief Removes the values matching a particular scalar from a COO formatted sparse matrix. - * - * @param in: input COO matrix - * @param out: output COO matrix - * @param scalar: scalar to remove from arrays - * @param stream: cuda stream to use + * DISCLAIMER: this file is deprecated: use filter.cuh instead */ -template -void coo_remove_scalar(COO* in, COO* out, T scalar, cudaStream_t stream) -{ - detail::coo_remove_scalar<128, T>(in, out, scalar, stream); -} -/** - * @brief Removes zeros from a COO formatted sparse matrix. - * - * @param in: input COO matrix - * @param out: output COO matrix - * @param stream: cuda stream to use - */ -template -void coo_remove_zeros(COO* in, COO* out, cudaStream_t stream) -{ - coo_remove_scalar(in, out, T(0.0), stream); -} +#pragma once -}; // namespace op -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "filter.cuh" diff --git a/cpp/include/raft/sparse/op/reduce.hpp b/cpp/include/raft/sparse/op/reduce.hpp index bb7560fa3d..37923e070c 100644 --- a/cpp/include/raft/sparse/op/reduce.hpp +++ b/cpp/include/raft/sparse/op/reduce.hpp @@ -18,75 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_REDUCE_H -#define __SPARSE_REDUCE_H - -#pragma once - -#include -#include -#include - -namespace raft { -namespace sparse { -namespace op { /** - * Computes a mask from a sorted COO matrix where 0's denote - * duplicate values and 1's denote new values. This mask can - * be useful for computing an exclusive scan to pre-build offsets - * for reducing duplicates, such as when symmetrizing - * or taking the min of each duplicated value. - * - * Note that this function always marks the first value as 0 so that - * a cumulative sum can be performed as a follow-on. However, even - * if the mask is used direclty, any duplicates should always have a - * 1 when first encountered so it can be assumed that the first element - * is always a 1 otherwise. - * - * @tparam value_idx - * @param[out] mask output mask, size nnz - * @param[in] rows COO rows array, size nnz - * @param[in] cols COO cols array, size nnz - * @param[in] nnz number of nonzeros in input arrays - * @param[in] stream cuda ops will be ordered wrt this stream + * DISCLAIMER: this file is deprecated: use reduce.cuh instead */ -template -void compute_duplicates_mask( - value_idx* mask, const value_idx* rows, const value_idx* cols, size_t nnz, cudaStream_t stream) -{ - detail::compute_duplicates_mask(mask, rows, cols, nnz, stream); -} -/** - * Performs a COO reduce of duplicate columns per row, taking the max weight - * for duplicate columns in each row. This function assumes the input COO - * has been sorted by both row and column but makes no assumption on - * the sorting of values. - * @tparam value_idx - * @tparam value_t - * @param[in] handle - * @param[out] out output COO, the nnz will be computed allocate() will be called in this function. - * @param[in] rows COO rows array, size nnz - * @param[in] cols COO cols array, size nnz - * @param[in] vals COO vals array, size nnz - * @param[in] nnz number of nonzeros in COO input arrays - * @param[in] m number of rows in COO input matrix - * @param[in] n number of columns in COO input matrix - */ -template -void max_duplicates(const raft::handle_t& handle, - raft::sparse::COO& out, - const value_idx* rows, - const value_idx* cols, - const value_t* vals, - size_t nnz, - size_t m, - size_t n) -{ - detail::max_duplicates(handle, out, rows, cols, vals, nnz, m, n); -} -}; // END namespace op -}; // END namespace sparse -}; // END namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "reduce.cuh" diff --git a/cpp/include/raft/sparse/op/row_op.hpp b/cpp/include/raft/sparse/op/row_op.hpp index ac12432e92..8443f9f090 100644 --- a/cpp/include/raft/sparse/op/row_op.hpp +++ b/cpp/include/raft/sparse/op/row_op.hpp @@ -18,37 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_ROW_OP_H -#define __SPARSE_ROW_OP_H - -#pragma once - -#include -#include - -namespace raft { -namespace sparse { -namespace op { - /** - * @brief Perform a custom row operation on a CSR matrix in batches. - * @tparam T numerical type of row_ind array - * @tparam TPB_X number of threads per block to use for underlying kernel - * @tparam Lambda type of custom operation function - * @param row_ind the CSR row_ind array to perform parallel operations over - * @param n_rows total number vertices in graph - * @param nnz number of non-zeros - * @param op custom row operation functor accepting the row and beginning index. - * @param stream cuda stream to use + * DISCLAIMER: this file is deprecated: use row_op.cuh instead */ -template void> -void csr_row_op(const Index_* row_ind, Index_ n_rows, Index_ nnz, Lambda op, cudaStream_t stream) -{ - detail::csr_row_op(row_ind, n_rows, nnz, op, stream); -} -}; // namespace op -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "row_op.cuh" diff --git a/cpp/include/raft/sparse/op/slice.hpp b/cpp/include/raft/sparse/op/slice.hpp index 75b7e478e5..4d7e1858de 100644 --- a/cpp/include/raft/sparse/op/slice.hpp +++ b/cpp/include/raft/sparse/op/slice.hpp @@ -18,69 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SLICE_H -#define __SLICE_H - -#pragma once - -#include -#include - -namespace raft { -namespace sparse { -namespace op { - /** - * Slice consecutive rows from a CSR array and populate newly sliced indptr array - * @tparam value_idx - * @param[in] start_row : beginning row to slice - * @param[in] stop_row : ending row to slice - * @param[in] indptr : indptr of input CSR to slice - * @param[out] indptr_out : output sliced indptr to populate - * @param[in] start_offset : beginning column offset of input indptr - * @param[in] stop_offset : ending column offset of input indptr - * @param[in] stream : cuda stream for ordering events + * DISCLAIMER: this file is deprecated: use slice.cuh instead */ -template -void csr_row_slice_indptr(value_idx start_row, - value_idx stop_row, - const value_idx* indptr, - value_idx* indptr_out, - value_idx* start_offset, - value_idx* stop_offset, - cudaStream_t stream) -{ - detail::csr_row_slice_indptr( - start_row, stop_row, indptr, indptr_out, start_offset, stop_offset, stream); -} -/** - * Slice rows from a CSR, populate column and data arrays - * @tparam value_idx : data type of CSR index arrays - * @tparam value_t : data type of CSR data array - * @param[in] start_offset : beginning column offset to slice - * @param[in] stop_offset : ending column offset to slice - * @param[in] indices : column indices array from input CSR - * @param[in] data : data array from input CSR - * @param[out] indices_out : output column indices array - * @param[out] data_out : output data array - * @param[in] stream : cuda stream for ordering events - */ -template -void csr_row_slice_populate(value_idx start_offset, - value_idx stop_offset, - const value_idx* indices, - const value_t* data, - value_idx* indices_out, - value_t* data_out, - cudaStream_t stream) -{ - detail::csr_row_slice_populate( - start_offset, stop_offset, indices, data, indices_out, data_out, stream); -} +#pragma once -}; // namespace op -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "slice.cuh" diff --git a/cpp/include/raft/sparse/op/sort.hpp b/cpp/include/raft/sparse/op/sort.hpp index cd363582fb..867bb1bf35 100644 --- a/cpp/include/raft/sparse/op/sort.hpp +++ b/cpp/include/raft/sparse/op/sort.hpp @@ -18,66 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_SORT_H -#define __SPARSE_SORT_H - -#pragma once - -#include -#include - -namespace raft { -namespace sparse { -namespace op { - /** - * @brief Sorts the arrays that comprise the coo matrix - * by row and then by column. - * - * @param m number of rows in coo matrix - * @param n number of cols in coo matrix - * @param nnz number of non-zeros - * @param rows rows array from coo matrix - * @param cols cols array from coo matrix - * @param vals vals array from coo matrix - * @param stream: cuda stream to use + * DISCLAIMER: this file is deprecated: use sort.cuh instead */ -template -void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t stream) -{ - detail::coo_sort(m, n, nnz, rows, cols, vals, stream); -} -/** - * @brief Sort the underlying COO arrays by row - * @tparam T: the type name of the underlying value array - * @param in: COO to sort by row - * @param stream: the cuda stream to use - */ -template -void coo_sort(COO* const in, cudaStream_t stream) -{ - coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); -} +#pragma once -/** - * Sorts a COO by its weight - * @tparam value_idx - * @tparam value_t - * @param[inout] rows source edges - * @param[inout] cols dest edges - * @param[inout] data edge weights - * @param[in] nnz number of edges in edge list - * @param[in] stream cuda stream for which to order cuda operations - */ -template -void coo_sort_by_weight( - value_idx* rows, value_idx* cols, value_t* data, value_idx nnz, cudaStream_t stream) -{ - detail::coo_sort_by_weight(rows, cols, data, nnz, stream); -} -}; // namespace op -}; // end NAMESPACE sparse -}; // end NAMESPACE raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "sort.cuh" diff --git a/cpp/include/raft/sparse/selection/connect_components.hpp b/cpp/include/raft/sparse/selection/connect_components.hpp index 25d71367db..b6597babc8 100644 --- a/cpp/include/raft/sparse/selection/connect_components.hpp +++ b/cpp/include/raft/sparse/selection/connect_components.hpp @@ -18,70 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __CONNECT_COMPONENTS_H -#define __CONNECT_COMPONENTS_H - -#include -#include -#include - -namespace raft { -namespace linkage { - -template -using FixConnectivitiesRedOp = detail::FixConnectivitiesRedOp; - /** - * Gets the number of unique components from array of - * colors or labels. This does not assume the components are - * drawn from a monotonically increasing set. - * @tparam value_idx - * @param[in] colors array of components - * @param[in] n_rows size of components array - * @param[in] stream cuda stream for which to order cuda operations - * @return total number of components + * DISCLAIMER: this file is deprecated: use connect_components.cuh instead */ -template -value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream) -{ - return detail::get_n_components(colors, n_rows, stream); -} -/** - * Connects the components of an otherwise unconnected knn graph - * by computing a 1-nn to neighboring components of each data point - * (e.g. component(nn) != component(self)) and reducing the results to - * include the set of smallest destination components for each source - * component. The result will not necessarily contain - * n_components^2 - n_components number of elements because many components - * will likely not be contained in the neighborhoods of 1-nns. - * @tparam value_idx - * @tparam value_t - * @param[in] handle raft handle - * @param[out] out output edge list containing nearest cross-component - * edges. - * @param[in] X original (row-major) dense matrix for which knn graph should be constructed. - * @param[in] orig_colors array containing component number for each row of X - * @param[in] n_rows number of rows in X - * @param[in] n_cols number of cols in X - * @param[in] reduction_op - * @param[in] metric - */ -template -void connect_components( - const raft::handle_t& handle, - raft::sparse::COO& out, - const value_t* X, - const value_idx* orig_colors, - size_t n_rows, - size_t n_cols, - red_op reduction_op, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) -{ - detail::connect_components(handle, out, X, orig_colors, n_rows, n_cols, reduction_op, metric); -} +#pragma once -}; // end namespace linkage -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "connect_components.cuh" diff --git a/cpp/include/raft/sparse/selection/knn.hpp b/cpp/include/raft/sparse/selection/knn.hpp index bd6dd39fdf..6924e0b5a7 100644 --- a/cpp/include/raft/sparse/selection/knn.hpp +++ b/cpp/include/raft/sparse/selection/knn.hpp @@ -18,90 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SPARSE_KNN_H -#define __SPARSE_KNN_H - -#pragma once - -#include -#include -#include - -namespace raft { -namespace sparse { -namespace selection { - /** - * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors - * using some distance implementation - * @param[in] idxIndptr csr indptr of the index matrix (size n_idx_rows + 1) - * @param[in] idxIndices csr column indices array of the index matrix (size n_idx_nnz) - * @param[in] idxData csr data array of the index matrix (size idxNNZ) - * @param[in] idxNNZ number of non-zeros for sparse index matrix - * @param[in] n_idx_rows number of data samples in index matrix - * @param[in] n_idx_cols - * @param[in] queryIndptr csr indptr of the query matrix (size n_query_rows + 1) - * @param[in] queryIndices csr indices array of the query matrix (size queryNNZ) - * @param[in] queryData csr data array of the query matrix (size queryNNZ) - * @param[in] queryNNZ number of non-zeros for sparse query matrix - * @param[in] n_query_rows number of data samples in query matrix - * @param[in] n_query_cols number of features in query matrix - * @param[out] output_indices dense matrix for output indices (size n_query_rows * k) - * @param[out] output_dists dense matrix for output distances (size n_query_rows * k) - * @param[in] k the number of neighbors to query - * @param[in] handle CUDA handle.get_stream() to order operations with respect to - * @param[in] batch_size_index maximum number of rows to use from index matrix per batch - * @param[in] batch_size_query maximum number of rows to use from query matrix per batch - * @param[in] metric distance metric/measure to use - * @param[in] metricArg potential argument for metric (currently unused) + * DISCLAIMER: this file is deprecated: use knn.cuh instead */ -template -void brute_force_knn(const value_idx* idxIndptr, - const value_idx* idxIndices, - const value_t* idxData, - size_t idxNNZ, - int n_idx_rows, - int n_idx_cols, - const value_idx* queryIndptr, - const value_idx* queryIndices, - const value_t* queryData, - size_t queryNNZ, - int n_query_rows, - int n_query_cols, - value_idx* output_indices, - value_t* output_dists, - int k, - const raft::handle_t& handle, - size_t batch_size_index = 2 << 14, // approx 1M - size_t batch_size_query = 2 << 14, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded, - float metricArg = 0) -{ - detail::sparse_knn_t(idxIndptr, - idxIndices, - idxData, - idxNNZ, - n_idx_rows, - n_idx_cols, - queryIndptr, - queryIndices, - queryData, - queryNNZ, - n_query_rows, - n_query_cols, - output_indices, - output_dists, - k, - handle, - batch_size_index, - batch_size_query, - metric, - metricArg) - .run(); -} -}; // namespace selection -}; // namespace sparse -}; // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "knn.cuh" diff --git a/cpp/include/raft/sparse/selection/knn_graph.hpp b/cpp/include/raft/sparse/selection/knn_graph.hpp index be47a6a9ef..833bdb61d2 100644 --- a/cpp/include/raft/sparse/selection/knn_graph.hpp +++ b/cpp/include/raft/sparse/selection/knn_graph.hpp @@ -18,51 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __KNN_GRAPH_H -#define __KNN_GRAPH_H - -#pragma once - -#include -#include -#include - -#include - -namespace raft { -namespace sparse { -namespace selection { - /** - * Constructs a (symmetrized) knn graph edge list from - * dense input vectors. - * - * Note: The resulting KNN graph is not guaranteed to be connected. - * - * @tparam value_idx - * @tparam value_t - * @param[in] handle raft handle - * @param[in] X dense matrix of input data samples and observations - * @param[in] m number of data samples (rows) in X - * @param[in] n number of observations (columns) in X - * @param[in] metric distance metric to use when constructing neighborhoods - * @param[out] out output edge list - * @param c + * DISCLAIMER: this file is deprecated: use knn_graph.cuh instead */ -template -void knn_graph(const handle_t& handle, - const value_t* X, - std::size_t m, - std::size_t n, - raft::distance::DistanceType metric, - raft::sparse::COO& out, - int c = 15) -{ - detail::knn_graph(handle, X, m, n, metric, out, c); -} -}; // namespace selection -}; // namespace sparse -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "knn_graph.cuh" diff --git a/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh b/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh index fca5d05465..123f6cf70f 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_kmeans_balanced.cuh @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index d4bce1fdf4..9ff19c2747 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/spatial/knn/epsilon_neighborhood.hpp b/cpp/include/raft/spatial/knn/epsilon_neighborhood.hpp index 7674ac0d46..1f1a3d8f8e 100644 --- a/cpp/include/raft/spatial/knn/epsilon_neighborhood.hpp +++ b/cpp/include/raft/spatial/knn/epsilon_neighborhood.hpp @@ -18,51 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __EPSILON_NEIGH_H -#define __EPSILON_NEIGH_H +/** + * DISCLAIMER: this file is deprecated: use epsilon_neighborhood.cuh instead + */ #pragma once -#include - -namespace raft { -namespace spatial { -namespace knn { - -/** - * @brief Computes epsilon neighborhood for the L2-Squared distance metric - * - * @tparam DataT IO and math type - * @tparam IdxT Index type - * - * @param[out] adj adjacency matrix [row-major] [on device] [dim = m x n] - * @param[out] vd vertex degree array [on device] [len = m + 1] - * `vd + m` stores the total number of edges in the adjacency - * matrix. Pass a nullptr if you don't need this info. - * @param[in] x first matrix [row-major] [on device] [dim = m x k] - * @param[in] y second matrix [row-major] [on device] [dim = n x k] - * @param[in] m number of rows in x - * @param[in] n number of rows in y - * @param[in] k number of columns in x and k - * @param[in] eps defines epsilon neighborhood radius (should be passed as - * squared as we compute L2-squared distance in this method) - * @param[in] stream cuda stream - */ -template -void epsUnexpL2SqNeighborhood(bool* adj, - IdxT* vd, - const DataT* x, - const DataT* y, - IdxT m, - IdxT n, - IdxT k, - DataT eps, - cudaStream_t stream) -{ - detail::epsUnexpL2SqNeighborhood(adj, vd, x, y, m, n, k, eps, stream); -} -} // namespace knn -} // namespace spatial -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "epsilon_neighborhood.cuh" diff --git a/cpp/include/raft/spatial/knn/specializations.hpp b/cpp/include/raft/spatial/knn/specializations.hpp index 13721a975f..04afb73036 100644 --- a/cpp/include/raft/spatial/knn/specializations.hpp +++ b/cpp/include/raft/spatial/knn/specializations.hpp @@ -18,13 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __KNN_SPECIALIZATIONS_H -#define __KNN_SPECIALIZATIONS_H +/** + * DISCLAIMER: this file is deprecated: use specializations.cuh instead + */ #pragma once -#include -#include -#include +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "specializations.cuh" diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp index e6b37f29ec..57553daedf 100644 --- a/cpp/include/raft/spectral/eigen_solvers.hpp +++ b/cpp/include/raft/spectral/eigen_solvers.hpp @@ -18,95 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __EIGEN_SOLVERS_H -#define __EIGEN_SOLVERS_H +/** + * DISCLAIMER: this file is deprecated: use eigen_solvers.cuh instead + */ #pragma once -#include -#include - -namespace raft { -namespace spectral { - -// aggregate of control params for Eigen Solver: -// -template -struct eigen_solver_config_t { - size_type_t n_eigVecs; - size_type_t maxIter; - - size_type_t restartIter; - value_type_t tol; - - bool reorthogonalize{false}; - unsigned long long seed{ - 1234567}; // CAVEAT: this default value is now common to all instances of using seed in - // Lanczos; was not the case before: there were places where a default seed = 123456 - // was used; this may trigger slightly different # solver iterations -}; - -template -struct lanczos_solver_t { - explicit lanczos_solver_t( - eigen_solver_config_t const& config) - : config_(config) - { - } - - index_type_t solve_smallest_eigenvectors( - handle_t const& handle, - matrix::sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const - { - RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); - RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); - index_type_t iters{}; - linalg::computeSmallestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); - return iters; - } - - index_type_t solve_largest_eigenvectors( - handle_t const& handle, - matrix::sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const - { - RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); - RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); - index_type_t iters{}; - linalg::computeLargestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); - return iters; - } - - auto const& get_config(void) const { return config_; } - - private: - eigen_solver_config_t config_; -}; - -} // namespace spectral -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif +#include "eigen_solvers.cuh" diff --git a/cpp/include/raft/stats/accuracy.hpp b/cpp/include/raft/stats/accuracy.hpp index 8cbb0f719e..a1b7321879 100644 --- a/cpp/include/raft/stats/accuracy.hpp +++ b/cpp/include/raft/stats/accuracy.hpp @@ -18,32 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __STATS_ACCURACY_H -#define __STATS_ACCURACY_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute accuracy of predictions. Useful for classification. - * @tparam math_t: data type for predictions (e.g., int for classification) - * @param[in] predictions: array of predictions (GPU pointer). - * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). - * @param[in] n: number of elements in each of predictions, ref_predictions. - * @param[in] stream: cuda stream. - * @return: Accuracy score in [0, 1]; higher is better. + * DISCLAIMER: this file is deprecated: use accuracy.cuh instead */ -template -float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, cudaStream_t stream) -{ - return detail::accuracy_score(predictions, ref_predictions, n, stream); -} -} // namespace stats -} // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "accuracy.cuh" diff --git a/cpp/include/raft/stats/adjusted_rand_index.hpp b/cpp/include/raft/stats/adjusted_rand_index.hpp index bc836eed86..3a990ac985 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.hpp +++ b/cpp/include/raft/stats/adjusted_rand_index.hpp @@ -18,43 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __ADJUSTED_RAND_INDEX_H -#define __ADJUSTED_RAND_INDEX_H - /** - * @file adjusted_rand_index.hpp - * @brief The adjusted Rand index is the corrected-for-chance version of the Rand index. - * Such a correction for chance establishes a baseline by using the expected similarity - * of all pair-wise comparisons between clusterings specified by a random model. + * DISCLAIMER: this file is deprecated: use adjusted_rand_index.cuh instead */ #pragma once -#include - -namespace raft { -namespace stats { - -/** - * @brief Function to calculate Adjusted RandIndex as described - * here - * @tparam T data-type for input label arrays - * @tparam MathT integral data-type used for computing n-choose-r - * @param firstClusterArray: the array of classes - * @param secondClusterArray: the array of classes - * @param size: the size of the data points of type int - * @param stream: the cudaStream object - */ -template -double adjusted_rand_index(const T* firstClusterArray, - const T* secondClusterArray, - int size, - cudaStream_t stream) -{ - return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); -} - -}; // end namespace stats -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif +#include "adjusted_rand_index.cuh" diff --git a/cpp/include/raft/stats/contingency_matrix.hpp b/cpp/include/raft/stats/contingency_matrix.hpp index 70800be1e6..141f678f94 100644 --- a/cpp/include/raft/stats/contingency_matrix.hpp +++ b/cpp/include/raft/stats/contingency_matrix.hpp @@ -18,93 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __CONTINGENCY_MATRIX_H -#define __CONTINGENCY_MATRIX_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief use this to allocate output matrix size - * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param nSamples: number of elements in input array - * @param stream: cuda stream for execution - * @param minLabel: [out] calculated min value in input array - * @param maxLabel: [out] calculated max value in input array + * DISCLAIMER: this file is deprecated: use contingency_matrix.cuh instead */ -template -void getInputClassCardinality( - const T* groundTruth, const int nSamples, cudaStream_t stream, T& minLabel, T& maxLabel) -{ - detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); -} -/** - * @brief Calculate workspace size for running contingency matrix calculations - * @tparam T label type - * @tparam OutT output matrix type - * @param nSamples: number of elements in input array - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param stream: cuda stream for execution - * @param minLabel: Optional, min value in input array - * @param maxLabel: Optional, max value in input array - */ -template -size_t getContingencyMatrixWorkspaceSize(int nSamples, - const T* groundTruth, - cudaStream_t stream, - T minLabel = std::numeric_limits::max(), - T maxLabel = std::numeric_limits::max()) -{ - return detail::getContingencyMatrixWorkspaceSize( - nSamples, groundTruth, stream, minLabel, maxLabel); -} - -/** - * @brief contruct contingency matrix given input ground truth and prediction - * labels. Users should call function getInputClassCardinality to find - * and allocate memory for output. Similarly workspace requirements - * should be checked using function getContingencyMatrixWorkspaceSize - * @tparam T label type - * @tparam OutT output matrix type - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param predictedLabel: device 1-d array for prediction (num of columns) - * @param nSamples: number of elements in input array - * @param outMat: output buffer for contingecy matrix - * @param stream: cuda stream for execution - * @param workspace: Optional, workspace memory allocation - * @param workspaceSize: Optional, size of workspace memory - * @param minLabel: Optional, min value in input ground truth array - * @param maxLabel: Optional, max value in input ground truth array - */ -template -void contingencyMatrix(const T* groundTruth, - const T* predictedLabel, - int nSamples, - OutT* outMat, - cudaStream_t stream, - void* workspace = nullptr, - size_t workspaceSize = 0, - T minLabel = std::numeric_limits::max(), - T maxLabel = std::numeric_limits::max()) -{ - detail::contingencyMatrix(groundTruth, - predictedLabel, - nSamples, - outMat, - stream, - workspace, - workspaceSize, - minLabel, - maxLabel); -} +#pragma once -}; // namespace stats -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "contingency_matrix.cuh" diff --git a/cpp/include/raft/stats/cov.hpp b/cpp/include/raft/stats/cov.hpp index a584dedc95..a6c653206a 100644 --- a/cpp/include/raft/stats/cov.hpp +++ b/cpp/include/raft/stats/cov.hpp @@ -18,50 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __COV_H -#define __COV_H +/** + * DISCLAIMER: this file is deprecated: use cov.cuh instead + */ #pragma once -#include -namespace raft { -namespace stats { -/** - * @brief Compute covariance of the input matrix - * - * Mean operation is assumed to be performed on a given column. - * - * @tparam Type the data type - * @param covar the output covariance matrix - * @param data the input matrix (this will get mean-centered at the end!) - * @param mu mean vector of the input matrix - * @param D number of columns of data - * @param N number of rows of data - * @param sample whether to evaluate sample covariance or not. In other words, - * whether to normalize the output using N-1 or N, for true or false, - * respectively - * @param rowMajor whether the input data is row or col major - * @param stable whether to run the slower-but-numerically-stable version or not - * @param handle cublas handle - * @param stream cuda stream - * @note if stable=true, then the input data will be mean centered after this - * function returns! - */ -template -void cov(const raft::handle_t& handle, - Type* covar, - Type* data, - const Type* mu, - std::size_t D, - std::size_t N, - bool sample, - bool rowMajor, - bool stable, - cudaStream_t stream) -{ - detail::cov(handle, covar, data, mu, D, N, sample, rowMajor, stable, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "cov.cuh" diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index 6d6f901fab..9c17d2ed0f 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -17,8 +17,8 @@ #pragma once #include -#include -#include +#include +#include namespace raft { namespace stats { diff --git a/cpp/include/raft/stats/dispersion.hpp b/cpp/include/raft/stats/dispersion.hpp index 7fabf07992..820c9e27ea 100644 --- a/cpp/include/raft/stats/dispersion.hpp +++ b/cpp/include/raft/stats/dispersion.hpp @@ -18,48 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __DISPERSION_H -#define __DISPERSION_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute cluster dispersion metric. This is very useful for - * automatically finding the 'k' (in kmeans) that improves this metric. - * @tparam DataT data type - * @tparam IdxT index type - * @tparam TPB threads block for kernels launched - * @param centroids the cluster centroids. This is assumed to be row-major - * and of dimension (nClusters x dim) - * @param clusterSizes number of points in the dataset which belong to each - * cluster. This is of length nClusters - * @param globalCentroid compute the global weighted centroid of all cluster - * centroids. This is of length dim. Pass a nullptr if this is not needed - * @param nClusters number of clusters - * @param nPoints number of points in the dataset - * @param dim dataset dimensionality - * @param stream cuda stream - * @return the cluster dispersion value + * DISCLAIMER: this file is deprecated: use dispersion.cuh instead */ -template -DataT dispersion(const DataT* centroids, - const IdxT* clusterSizes, - DataT* globalCentroid, - IdxT nClusters, - IdxT nPoints, - IdxT dim, - cudaStream_t stream) -{ - return detail::dispersion( - centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); -} -} // end namespace stats -} // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "dispersion.cuh" diff --git a/cpp/include/raft/stats/entropy.hpp b/cpp/include/raft/stats/entropy.hpp index 37dc2b700c..d8e1c11125 100644 --- a/cpp/include/raft/stats/entropy.hpp +++ b/cpp/include/raft/stats/entropy.hpp @@ -18,37 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __ENTROPY_H -#define __ENTROPY_H - -#pragma once -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate entropy - * more info on entropy - * - * @param clusterArray: the array of classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - * @return the entropy score + * DISCLAIMER: this file is deprecated: use entropy.cuh instead */ -template -double entropy(const T* clusterArray, - const int size, - const T lowerLabelRange, - const T upperLabelRange, - cudaStream_t stream) -{ - return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "entropy.cuh" diff --git a/cpp/include/raft/stats/histogram.hpp b/cpp/include/raft/stats/histogram.hpp index 627026c219..c123375597 100644 --- a/cpp/include/raft/stats/histogram.hpp +++ b/cpp/include/raft/stats/histogram.hpp @@ -18,54 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __HISTOGRAM_H -#define __HISTOGRAM_H - -#pragma once - -#include -#include - -// This file is a shameless amalgamation of independent works done by -// Lars Nyland and Andy Adinets - -///@todo: add cub's histogram as another option - -namespace raft { -namespace stats { - /** - * @brief Perform histogram on the input data. It chooses the right load size - * based on the input data vector length. It also supports large-bin cases - * using a specialized smem-based hashing technique. - * @tparam DataT input data type - * @tparam IdxT data type used to compute indices - * @tparam BinnerOp takes the input data and computes its bin index - * @param type histogram implementation type to choose - * @param bins the output bins (length = ncols * nbins) - * @param nbins number of bins - * @param data input data (length = ncols * nrows) - * @param nrows data array length in each column (or batch) - * @param ncols number of columsn (or batch size) - * @param stream cuda stream - * @param binner the operation that computes the bin index of the input data - * - * @note signature of BinnerOp is `int func(DataT, IdxT);` + * DISCLAIMER: this file is deprecated: use histogram.cuh instead */ -template > -void histogram(HistType type, - int* bins, - IdxT nbins, - const DataT* data, - IdxT nrows, - IdxT ncols, - cudaStream_t stream, - BinnerOp binner = IdentityBinner()) -{ - detail::histogram(type, bins, nbins, data, nrows, ncols, stream, binner); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "histogram.cuh" diff --git a/cpp/include/raft/stats/homogeneity_score.hpp b/cpp/include/raft/stats/homogeneity_score.hpp index 4e119f2bc7..8d2433d1da 100644 --- a/cpp/include/raft/stats/homogeneity_score.hpp +++ b/cpp/include/raft/stats/homogeneity_score.hpp @@ -13,46 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - /** * This file is deprecated and will be removed in release 22.06. * Please use the cuh version instead. */ -#ifndef __HOMOGENEITY_SCORE_H -#define __HOMOGENEITY_SCORE_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate the homogeneity score between two clusters - * more info on mutual - * information - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object + * DISCLAIMER: this file is deprecated: use homogeneity_score.cuh instead */ -template -double homogeneity_score(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - return detail::homogeneity_score( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "homogeneity_score.cuh" diff --git a/cpp/include/raft/stats/information_criterion.hpp b/cpp/include/raft/stats/information_criterion.hpp index 3a39e56c41..898ffbfa8e 100644 --- a/cpp/include/raft/stats/information_criterion.hpp +++ b/cpp/include/raft/stats/information_criterion.hpp @@ -18,56 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __INFORMATION_CRIT_H -#define __INFORMATION_CRIT_H - /** - * @file information_criterion.hpp - * @brief These information criteria are used to evaluate the quality of models - * by balancing the quality of the fit and the number of parameters. - * - * See: - * - AIC: https://en.wikipedia.org/wiki/Akaike_information_criterion - * - AICc: https://en.wikipedia.org/wiki/Akaike_information_criterion#AICc - * - BIC: https://en.wikipedia.org/wiki/Bayesian_information_criterion + * DISCLAIMER: this file is deprecated: use information_criterion.cuh instead */ -#pragma once - -#include -#include - -namespace raft { -namespace stats { -/** - * Compute the given type of information criterion - * - * @note: it is safe to do the computation in-place (i.e give same pointer - * as input and output) - * - * @param[out] d_ic Information criterion to be returned for each - * series (device) - * @param[in] d_loglikelihood Log-likelihood for each series (device) - * @param[in] ic_type Type of criterion to compute. See IC_Type - * @param[in] n_params Number of parameters in the model - * @param[in] batch_size Number of series in the batch - * @param[in] n_samples Number of samples in each series - * @param[in] stream CUDA stream - */ -template -void information_criterion_batched(ScalarT* d_ic, - const ScalarT* d_loglikelihood, - IC_Type ic_type, - IdxT n_params, - IdxT batch_size, - IdxT n_samples, - cudaStream_t stream) -{ - batched::detail::information_criterion( - d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_samples, stream); -} +#pragma once -} // namespace stats -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "information_criterion.cuh" diff --git a/cpp/include/raft/stats/kl_divergence.hpp b/cpp/include/raft/stats/kl_divergence.hpp index 59db77246f..086d5f1d23 100644 --- a/cpp/include/raft/stats/kl_divergence.hpp +++ b/cpp/include/raft/stats/kl_divergence.hpp @@ -18,34 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __KL_DIVERGENCE_H -#define __KL_DIVERGENCE_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate KL Divergence - * more info on KL - * Divergence - * - * @tparam DataT: Data type of the input array - * @param modelPDF: the model array of probability density functions of type DataT - * @param candidatePDF: the candidate array of probability density functions of type DataT - * @param size: the size of the data points of type int - * @param stream: the cudaStream object + * DISCLAIMER: this file is deprecated: use kl_divergence.cuh instead */ -template -DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, cudaStream_t stream) -{ - return detail::kl_divergence(modelPDF, candidatePDF, size, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "kl_divergence.cuh" diff --git a/cpp/include/raft/stats/mean.hpp b/cpp/include/raft/stats/mean.hpp index 2767b632e6..bce899d9d4 100644 --- a/cpp/include/raft/stats/mean.hpp +++ b/cpp/include/raft/stats/mean.hpp @@ -18,43 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MEAN_H -#define __MEAN_H - -#pragma once - -#include "detail/mean.cuh" - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute mean of the input matrix - * - * Mean operation is assumed to be performed on a given column. - * - * @tparam Type: the data type - * @tparam IdxType Integer type used to for addressing - * @param mu: the output mean vector - * @param data: the input matrix - * @param D: number of columns of data - * @param N: number of rows of data - * @param sample: whether to evaluate sample mean or not. In other words, - * whether - * to normalize the output using N-1 or N, for true or false, respectively - * @param rowMajor: whether the input data is row or col major - * @param stream: cuda stream + * DISCLAIMER: this file is deprecated: use mean.cuh instead */ -template -void mean( - Type* mu, const Type* data, IdxType D, IdxType N, bool sample, bool rowMajor, cudaStream_t stream) -{ - detail::mean(mu, data, D, N, sample, rowMajor, stream); -} -}; // namespace stats -}; // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "mean.cuh" diff --git a/cpp/include/raft/stats/mean_center.hpp b/cpp/include/raft/stats/mean_center.hpp index e219891cab..73e49e7307 100644 --- a/cpp/include/raft/stats/mean_center.hpp +++ b/cpp/include/raft/stats/mean_center.hpp @@ -18,71 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MEAN_CENTER_H -#define __MEAN_CENTER_H - -#pragma once - -#include "detail/mean_center.cuh" - -namespace raft { -namespace stats { - /** - * @brief Center the input matrix wrt its mean - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads per block of the cuda kernel launched - * @param out the output mean-centered matrix - * @param data input matrix - * @param mu the mean vector - * @param D number of columns of data - * @param N number of rows of data - * @param rowMajor whether input is row or col major - * @param bcastAlongRows whether to broadcast vector along rows or columns - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use mean_center.cuh instead */ -template -void meanCenter(Type* out, - const Type* data, - const Type* mu, - IdxType D, - IdxType N, - bool rowMajor, - bool bcastAlongRows, - cudaStream_t stream) -{ - detail::meanCenter(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); -} -/** - * @brief Add the input matrix wrt its mean - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @tparam TPB threads per block of the cuda kernel launched - * @param out the output mean-added matrix - * @param data input matrix - * @param mu the mean vector - * @param D number of columns of data - * @param N number of rows of data - * @param rowMajor whether input is row or col major - * @param bcastAlongRows whether to broadcast vector along rows or columns - * @param stream cuda stream where to launch work - */ -template -void meanAdd(Type* out, - const Type* data, - const Type* mu, - IdxType D, - IdxType N, - bool rowMajor, - bool bcastAlongRows, - cudaStream_t stream) -{ - detail::meanAdd(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); -} +#pragma once -}; // end namespace stats -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "mean_center.cuh" diff --git a/cpp/include/raft/stats/meanvar.hpp b/cpp/include/raft/stats/meanvar.hpp index d7ef935fbc..db67a68579 100644 --- a/cpp/include/raft/stats/meanvar.hpp +++ b/cpp/include/raft/stats/meanvar.hpp @@ -18,48 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MEANVAR_H -#define __MEANVAR_H - -#pragma once - -#include "detail/meanvar.cuh" - -namespace raft::stats { - /** - * @brief Compute mean and variance for each column of a given matrix. - * - * The operation is performed in a single sweep. Consider using it when you need to compute - * both mean and variance, or when you need to compute variance but don't have the mean. - * It's almost twice faster than running `mean` and `vars` sequentially, because all three - * kernels are memory-bound. - * - * @tparam Type the data type - * @tparam IdxType Integer type used for addressing - * @param [out] mean the output mean vector of size D - * @param [out] var the output variance vector of size D - * @param [in] data the input matrix of size [N, D] - * @param [in] D number of columns of data - * @param [in] N number of rows of data - * @param [in] sample whether to evaluate sample variance or not. In other words, whether to - * normalize the variance using N-1 or N, for true or false respectively. - * @param [in] rowMajor whether the input data is row- or col-major, for true or false respectively. - * @param [in] stream + * DISCLAIMER: this file is deprecated: use meanvar.cuh instead */ -template -void meanvar(Type* mean, - Type* var, - const Type* data, - IdxType D, - IdxType N, - bool sample, - bool rowMajor, - cudaStream_t stream) -{ - detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream); -} -}; // namespace raft::stats +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "meanvar.cuh" diff --git a/cpp/include/raft/stats/minmax.hpp b/cpp/include/raft/stats/minmax.hpp index 97f06129fa..ad588a38d4 100644 --- a/cpp/include/raft/stats/minmax.hpp +++ b/cpp/include/raft/stats/minmax.hpp @@ -18,62 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MINMAX_H -#define __MINMAX_H - -#pragma once - -#include -#include -#include - -#include - -namespace raft { -namespace stats { - /** - * @brief Computes min/max across every column of the input matrix, as well as - * optionally allow to subsample based on the given row/col ID mapping vectors - * - * @tparam T the data type - * @tparam TPB number of threads per block - * @param data input data - * @param rowids actual row ID mappings. It is of length nrows. If you want to - * skip this index lookup entirely, pass nullptr - * @param colids actual col ID mappings. It is of length ncols. If you want to - * skip this index lookup entirely, pass nullptr - * @param nrows number of rows of data to be worked upon. The actual rows of the - * input "data" can be bigger than this! - * @param ncols number of cols of data to be worked upon. The actual cols of the - * input "data" can be bigger than this! - * @param row_stride stride (in number of elements) between 2 adjacent columns - * @param globalmin final col-wise global minimum (size = ncols) - * @param globalmax final col-wise global maximum (size = ncols) - * @param sampledcols output sampled data. Pass nullptr if you don't need this - * @param stream cuda stream - * @note This method makes the following assumptions: - * 1. input and output matrices are assumed to be col-major - * 2. ncols is small enough to fit the whole of min/max values across all cols - * in shared memory + * DISCLAIMER: this file is deprecated: use minmax.cuh instead */ -template -void minmax(const T* data, - const unsigned* rowids, - const unsigned* colids, - int nrows, - int ncols, - int row_stride, - T* globalmin, - T* globalmax, - T* sampledcols, - cudaStream_t stream) -{ - detail::minmax( - data, rowids, colids, nrows, ncols, row_stride, globalmin, globalmax, sampledcols, stream); -} -}; // namespace stats -}; // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "minmax.cuh" diff --git a/cpp/include/raft/stats/mutual_info_score.hpp b/cpp/include/raft/stats/mutual_info_score.hpp index a080211c36..c3446e3963 100644 --- a/cpp/include/raft/stats/mutual_info_score.hpp +++ b/cpp/include/raft/stats/mutual_info_score.hpp @@ -18,39 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __MUTUAL_INFO_SCORE_H -#define __MUTUAL_INFO_SCORE_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate the mutual information between two clusters - * more info on mutual information - * @param firstClusterArray: the array of classes of type T - * @param secondClusterArray: the array of classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object + * DISCLAIMER: this file is deprecated: use mutual_info_score.cuh instead */ -template -double mutual_info_score(const T* firstClusterArray, - const T* secondClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream) -{ - return detail::mutual_info_score( - firstClusterArray, secondClusterArray, size, lowerLabelRange, upperLabelRange, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "mutual_info_score.cuh" diff --git a/cpp/include/raft/stats/r2_score.hpp b/cpp/include/raft/stats/r2_score.hpp index c88a1822ec..bc55a6596d 100644 --- a/cpp/include/raft/stats/r2_score.hpp +++ b/cpp/include/raft/stats/r2_score.hpp @@ -18,38 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __R2_SCORE_H -#define __R2_SCORE_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * Calculates the "Coefficient of Determination" (R-Squared) score - * normalizing the sum of squared errors by the total sum of squares. - * - * This score indicates the proportionate amount of variation in an - * expected response variable is explained by the independent variables - * in a linear regression model. The larger the R-squared value, the - * more variability is explained by the linear regression model. - * - * @param y: Array of ground-truth response variables - * @param y_hat: Array of predicted response variables - * @param n: Number of elements in y and y_hat - * @param stream: cuda stream - * @return: The R-squared value. + * DISCLAIMER: this file is deprecated: use r2_score.cuh instead */ -template -math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) -{ - return detail::r2_score(y, y_hat, n, stream); -} -} // namespace stats -} // namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "r2_score.cuh" diff --git a/cpp/include/raft/stats/rand_index.hpp b/cpp/include/raft/stats/rand_index.hpp index e8c3089371..7d398dddb4 100644 --- a/cpp/include/raft/stats/rand_index.hpp +++ b/cpp/include/raft/stats/rand_index.hpp @@ -18,31 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __RAND_INDEX_H -#define __RAND_INDEX_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate RandIndex - * more info on rand index - * @param firstClusterArray: the array of classes of type T - * @param secondClusterArray: the array of classes of type T - * @param size: the size of the data points of type uint64_t - * @param stream: the cudaStream object + * DISCLAIMER: this file is deprecated: use rand_index.cuh instead */ -template -double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cudaStream_t stream) -{ - return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "rand_index.cuh" diff --git a/cpp/include/raft/stats/regression_metrics.hpp b/cpp/include/raft/stats/regression_metrics.hpp index f65ad524ef..084f4f8fbc 100644 --- a/cpp/include/raft/stats/regression_metrics.hpp +++ b/cpp/include/raft/stats/regression_metrics.hpp @@ -18,43 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __REGRESSION_METRICS_H -#define __REGRESSION_METRICS_H +/** + * DISCLAIMER: this file is deprecated: use regression_metrics.cuh instead + */ #pragma once -#include - -namespace raft { -namespace stats { - -/** - * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error - * @tparam T: data type for predictions (e.g., float or double for regression). - * @param[in] predictions: array of predictions (GPU pointer). - * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). - * @param[in] n: number of elements in each of predictions, ref_predictions. Should be > 0. - * @param[in] stream: cuda stream. - * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - - * ref_predictions[i]|) / n. - * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - - * ref_predictions[i])^2) / n. - * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - - * ref_predictions[i]| for i in [0, n). - */ -template -void regression_metrics(const T* predictions, - const T* ref_predictions, - int n, - cudaStream_t stream, - double& mean_abs_error, - double& mean_squared_error, - double& median_abs_error) -{ - detail::regression_metrics( - predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); -} -} // namespace stats -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "regression_metrics.cuh" diff --git a/cpp/include/raft/stats/silhouette_score.hpp b/cpp/include/raft/stats/silhouette_score.hpp index e6c84855c6..54981edbb6 100644 --- a/cpp/include/raft/stats/silhouette_score.hpp +++ b/cpp/include/raft/stats/silhouette_score.hpp @@ -18,67 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SILHOUETTE_SCORE_H -#define __SILHOUETTE_SCORE_H - -#pragma once - -#include -#include - -namespace raft { -namespace stats { - /** - * @brief main function that returns the average silhouette score for a given set of data and its - * clusterings - * @tparam DataT: type of the data samples - * @tparam LabelT: type of the labels - * @param handle: raft handle for managing expensive resources - * @param X_in: pointer to the input Data samples array (nRows x nCols) - * @param nRows: number of data samples - * @param nCols: number of features - * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) - * @param nLabels: number of Labels - * @param silhouette_scorePerSample: pointer to the array that is optionally taken in as input and - * is populated with the silhouette score for every sample (1 x nRows) - * @param stream: the cuda stream where to launch this kernel - * @param metric: the numerical value that maps to the type of distance metric to be used in the - * calculations + * DISCLAIMER: this file is deprecated: use silhouette_score.cuh instead */ -template -DataT silhouette_score( - const raft::handle_t& handle, - DataT* X_in, - int nRows, - int nCols, - LabelT* labels, - int nLabels, - DataT* silhouette_scorePerSample, - cudaStream_t stream, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) -{ - return detail::silhouette_score( - handle, X_in, nRows, nCols, labels, nLabels, silhouette_scorePerSample, stream, metric); -} -template -value_t silhouette_score_batched( - const raft::handle_t& handle, - value_t* X, - value_idx n_rows, - value_idx n_cols, - label_idx* y, - label_idx n_labels, - value_t* scores, - value_idx chunk, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) -{ - return batched::detail::silhouette_score( - handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); -} +#pragma once -}; // namespace stats -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "silhouette_score.cuh" diff --git a/cpp/include/raft/stats/specializations.hpp b/cpp/include/raft/stats/specializations.hpp index 3929b3124c..0ae82f27e7 100644 --- a/cpp/include/raft/stats/specializations.hpp +++ b/cpp/include/raft/stats/specializations.hpp @@ -18,12 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __STATS_SPECIALIZATIONS_H -#define __STATS_SPECIALIZATIONS_H +/** + * DISCLAIMER: this file is deprecated: use specializations.cuh instead + */ #pragma once -#include -#include +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "specializations.cuh" diff --git a/cpp/include/raft/stats/stddev.hpp b/cpp/include/raft/stats/stddev.hpp index f496b1fd30..2222a2706d 100644 --- a/cpp/include/raft/stats/stddev.hpp +++ b/cpp/include/raft/stats/stddev.hpp @@ -18,81 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __STDDEV_H -#define __STDDEV_H - -#pragma once - -#include "detail/stddev.cuh" - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute stddev of the input matrix - * - * Stddev operation is assumed to be performed on a given column. - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param std the output stddev vector - * @param data the input matrix - * @param mu the mean vector - * @param D number of columns of data - * @param N number of rows of data - * @param sample whether to evaluate sample stddev or not. In other words, - * whether - * to normalize the output using N-1 or N, for true or false, respectively - * @param rowMajor whether the input data is row or col major - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use stddev.cuh instead */ -template -void stddev(Type* std, - const Type* data, - const Type* mu, - IdxType D, - IdxType N, - bool sample, - bool rowMajor, - cudaStream_t stream) -{ - detail::stddev(std, data, mu, D, N, sample, rowMajor, stream); -} -/** - * @brief Compute variance of the input matrix - * - * Variance operation is assumed to be performed on a given column. - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param var the output stddev vector - * @param data the input matrix - * @param mu the mean vector - * @param D number of columns of data - * @param N number of rows of data - * @param sample whether to evaluate sample stddev or not. In other words, - * whether - * to normalize the output using N-1 or N, for true or false, respectively - * @param rowMajor whether the input data is row or col major - * @param stream cuda stream where to launch work - */ -template -void vars(Type* var, - const Type* data, - const Type* mu, - IdxType D, - IdxType N, - bool sample, - bool rowMajor, - cudaStream_t stream) -{ - detail::vars(var, data, mu, D, N, sample, rowMajor, stream); -} +#pragma once -}; // namespace stats -}; // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "stddev.cuh" diff --git a/cpp/include/raft/stats/sum.hpp b/cpp/include/raft/stats/sum.hpp index e1c8c67777..0b11a6219e 100644 --- a/cpp/include/raft/stats/sum.hpp +++ b/cpp/include/raft/stats/sum.hpp @@ -18,39 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __SUM_H -#define __SUM_H - -#pragma once - -#include "detail/sum.cuh" - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute sum of the input matrix - * - * Sum operation is assumed to be performed on a given column. - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param output the output mean vector - * @param input the input matrix - * @param D number of columns of data - * @param N number of rows of data - * @param rowMajor whether the input data is row or col major - * @param stream cuda stream where to launch work + * DISCLAIMER: this file is deprecated: use sum.cuh instead */ -template -void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, cudaStream_t stream) -{ - detail::sum(output, input, D, N, rowMajor, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "sum.cuh" diff --git a/cpp/include/raft/stats/trustworthiness_score.hpp b/cpp/include/raft/stats/trustworthiness_score.hpp index 81edf2ea04..0053860a92 100644 --- a/cpp/include/raft/stats/trustworthiness_score.hpp +++ b/cpp/include/raft/stats/trustworthiness_score.hpp @@ -18,41 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __TRUSTWORTHINESS_SCORE_H -#define __TRUSTWORTHINESS_SCORE_H +/** + * DISCLAIMER: this file is deprecated: use trustworthiness_score.cuh instead + */ #pragma once -#include - -namespace raft { -namespace stats { -/** - * @brief Compute the trustworthiness score - * @param[in] h: raft handle - * @param[in] X: Data in original dimension - * @param[in] X_embedded: Data in target dimension (embedding) - * @param[in] n: Number of samples - * @param[in] m: Number of features in high/original dimension - * @param[in] d: Number of features in low/embedded dimension - * @param[in] n_neighbors Number of neighbors considered by trustworthiness score - * @param[in] batchSize Batch size - * @return[out] Trustworthiness score - */ -template -double trustworthiness_score(const raft::handle_t& h, - const math_t* X, - math_t* X_embedded, - int n, - int m, - int d, - int n_neighbors, - int batchSize = 512) -{ - return detail::trustworthiness_score( - h, X, X_embedded, n, m, d, n_neighbors, batchSize); -} -} // namespace stats -} // namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "trustworthiness_score.cuh" diff --git a/cpp/include/raft/stats/v_measure.hpp b/cpp/include/raft/stats/v_measure.hpp index a137af844d..0179d2c856 100644 --- a/cpp/include/raft/stats/v_measure.hpp +++ b/cpp/include/raft/stats/v_measure.hpp @@ -18,40 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __V_MEASURE_H -#define __V_MEASURE_H - -#pragma once -#include - -namespace raft { -namespace stats { - /** - * @brief Function to calculate the v-measure between two clusters - * - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T - * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels - * @param stream: the cudaStream object - * @param beta: v_measure parameter + * DISCLAIMER: this file is deprecated: use v_measure.cuh instead */ -template -double v_measure(const T* truthClusterArray, - const T* predClusterArray, - int size, - T lowerLabelRange, - T upperLabelRange, - cudaStream_t stream, - double beta = 1.0) -{ - return detail::v_measure( - truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); -} -}; // end namespace stats -}; // end namespace raft +#pragma once + +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "v_measure.cuh" diff --git a/cpp/include/raft/stats/weighted_mean.hpp b/cpp/include/raft/stats/weighted_mean.hpp index 5b3f4678d8..8bc4bf4623 100644 --- a/cpp/include/raft/stats/weighted_mean.hpp +++ b/cpp/include/raft/stats/weighted_mean.hpp @@ -18,84 +18,14 @@ * Please use the cuh version instead. */ -#ifndef __WEIGHTED_MEAN_H -#define __WEIGHTED_MEAN_H - -#pragma once - -#include - -namespace raft { -namespace stats { - /** - * @brief Compute the weighted mean of the input matrix with a - * vector of weights, along rows or along columns - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param mu the output mean vector - * @param data the input matrix - * @param weights weight of size D if along_row is true, else of size N - * @param D number of columns of data - * @param N number of rows of data - * @param row_major data input matrix is row-major or not - * @param along_rows whether to reduce along rows or columns - * @param stream cuda stream to launch work on + * DISCLAIMER: this file is deprecated: use weighted_mean.cuh instead */ -template -void weightedMean(Type* mu, - const Type* data, - const Type* weights, - IdxType D, - IdxType N, - bool row_major, - bool along_rows, - cudaStream_t stream) -{ - detail::weightedMean(mu, data, weights, D, N, row_major, along_rows, stream); -} -/** - * @brief Compute the row-wise weighted mean of the input matrix with a - * vector of column weights - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param mu the output mean vector - * @param data the input matrix (assumed to be row-major) - * @param weights per-column means - * @param D number of columns of data - * @param N number of rows of data - * @param stream cuda stream to launch work on - */ -template -void rowWeightedMean( - Type* mu, const Type* data, const Type* weights, IdxType D, IdxType N, cudaStream_t stream) -{ - weightedMean(mu, data, weights, D, N, true, true, stream); -} +#pragma once -/** - * @brief Compute the column-wise weighted mean of the input matrix with a - * vector of row weights - * - * @tparam Type the data type - * @tparam IdxType Integer type used to for addressing - * @param mu the output mean vector - * @param data the input matrix (assumed to be row-major) - * @param weights per-row means - * @param D number of columns of data - * @param N number of rows of data - * @param stream cuda stream to launch work on - */ -template -void colWeightedMean( - Type* mu, const Type* data, const Type* weights, IdxType D, IdxType N, cudaStream_t stream) -{ - weightedMean(mu, data, weights, D, N, true, false, stream); -} -}; // end namespace stats -}; // end namespace raft +#pragma message(__FILE__ \ + " is deprecated and will be removed in a future release." \ + " Please use the cuh version instead.") -#endif \ No newline at end of file +#include "weighted_mean.cuh" diff --git a/cpp/test/spatial/ball_cover.cu b/cpp/test/spatial/ball_cover.cu index d1bfe4a2e4..a23262fc8e 100644 --- a/cpp/test/spatial/ball_cover.cu +++ b/cpp/test/spatial/ball_cover.cu @@ -18,8 +18,8 @@ #include "spatial_data.h" #include #include -#include -#include +#include +#include #include #if defined RAFT_NN_COMPILED #include From 8b97c1baa1a574c8fa45cd87366e58107a19ebe8 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Fri, 2 Sep 2022 14:06:30 +0200 Subject: [PATCH 13/58] Minor follow-up fixes for ivf-flat (#796) Small fixes to ivf-flat and its dependency warp_sort: - fix the template parameter for the call of `calc_smem_size_for_block_wide` (may reduce the use of shared memory); - force initialize warp_sort internal value buffers to avoid uninitialized output in case of very small input data size; - small readability fixes. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/796 --- cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh | 9 ++++----- .../raft/spatial/knn/detail/topk/warpsort_topk.cuh | 6 +++++- cpp/test/spatial/ann_ivf_flat.cu | 7 +++++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh index fab845396c..d8219a48f9 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh @@ -696,8 +696,8 @@ __global__ void __launch_bounds__(kThreadsPerBlock) copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); __syncthreads(); - topk::block_sort queue( - k, interleaved_scan_kernel_smem + query_smem_elems * sizeof(T)); + using block_sort_t = topk::block_sort; + block_sort_t queue(k, interleaved_scan_kernel_smem + query_smem_elems * sizeof(T)); { using align_warp = Pow2; @@ -766,8 +766,7 @@ __global__ void __launch_bounds__(kThreadsPerBlock) } // Enqueue one element per thread - constexpr float kDummy = Ascending ? upper_bound() : lower_bound(); - const float val = valid ? static_cast(dist) : kDummy; + const float val = valid ? static_cast(dist) : block_sort_t::queue_t::kDummy; const size_t idx = valid ? static_cast(list_indices[list_offset + vec_id]) : 0; queue.add(val, idx); } @@ -826,7 +825,7 @@ void launch_kernel(Lambda lambda, std::min(max_query_smem / sizeof(T), Pow2::roundUp(index.dim())); int smem_size = query_smem_elems * sizeof(T); constexpr int kSubwarpSize = std::min(Capacity, WarpSize); - smem_size += raft::spatial::knn::detail::topk::calc_smem_size_for_block_wide( + smem_size += raft::spatial::knn::detail::topk::calc_smem_size_for_block_wide( kThreadsPerBlock / kSubwarpSize, k); // power-of-two less than cuda limit (for better addr alignment) diff --git a/cpp/include/raft/spatial/knn/detail/topk/warpsort_topk.cuh b/cpp/include/raft/spatial/knn/detail/topk/warpsort_topk.cuh index 017678afbb..23448b6dc4 100644 --- a/cpp/include/raft/spatial/knn/detail/topk/warpsort_topk.cuh +++ b/cpp/include/raft/spatial/knn/detail/topk/warpsort_topk.cuh @@ -135,6 +135,7 @@ constexpr auto calc_capacity(int k) -> int template class warp_sort { static_assert(isPo2(Capacity)); + static_assert(std::is_default_constructible_v); public: /** @@ -158,6 +159,7 @@ class warp_sort { #pragma unroll for (int i = 0; i < kMaxArrLen; i++) { val_arr_[i] = kDummy; + idx_arr_[i] = IdxT{}; } } @@ -280,6 +282,7 @@ class warp_sort_filtered : public warp_sort { #pragma unroll for (int i = 0; i < kMaxBufLen; i++) { val_buf_[i] = kDummy; + idx_buf_[i] = IdxT{}; } } @@ -371,6 +374,7 @@ class warp_sort_immediate : public warp_sort { #pragma unroll for (int i = 0; i < kMaxArrLen; i++) { val_buf_[i] = kDummy; + idx_buf_[i] = IdxT{}; } } @@ -429,9 +433,9 @@ template